In [3]:
import numpy as np
import pandas as pd
import cmd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from IPython.display import clear_output
import matplotlib.pyplot as plt
import random
%matplotlib inline

while True:
    df = pd.read_csv('DataFrame.csv', sep = ';', index_col = 0)

    #creates the base DFs to train regression model (with Product and Service)

    df_input = df.iloc[:,1:16]
    df_middle = df.iloc[:,16:]

    #creates the base DFs to train classification model (with Product and Service)

    added_to_second = ['DS Discover', 'DS Define','DS Develop','DS Deliver', 'ES Product', 'ES Service', 'Team size min',
                       'Part Users', 'Part Experts', 'Part Service staff', 'Part Stakeholders']
    df_middle_second = pd.concat([df_middle, df[added_to_second]], axis = 1)
    df_middle_second.iloc[:,:3] = df_middle_second.iloc[:,:3].apply(lambda x: x*2)

    mapping_names = {name: index for index, name in enumerate(sorted(df['Name'].unique()))}
    df_output = df['Name'].map(mapping_names)

    #user inputs

    def questionnaire(question, answers, error):
        while True:
            try:
                var = input(question)
                var = [[int(i)] for i in var.split(sep = ',')]
                if var == [-1]: break
                if all(x in np.array(answers) for x in var):
                    break
                else:
                    clear_output()
                    print(error)
            except:
                clear_output()
                print(error)
                continue
        clear_output()
        return var

    question1 = 'In which stage is the design at the moment?\n(1) Discover\n(2) Define\n(3) Develop\n(4) Deliver\nSelect a number (1/2/3/4): \n'
    error1 = 'Please, provide a number among 1, 2, 3 and 4!\n'
    design_step = questionnaire(question1, [1,2,3,4], error1)[0][0]

    question2 = 'Which innovation level is being pursued?\n(1) Incremental\n(2) Evolutive\n(3) Radical\nSelect a number (1/2/3)\n'
    error2 = 'Please, provide an integer number between 0 and 10!\n'
    innovation_focus = questionnaire(question2, [1,2,3], error2)[0][0]

    question3 = 'In a scale from 1 to 10, how good in the team\'s relationship?\n(0) Bad\n(10) Good\nChose a number between 1 and 10\n'
    error3 = 'Please, provide an integer number between 0 and 10!\n'
    team_cohesion = questionnaire(question3, np.arange(0,11,1), error3)[0][0]/10

    question4 = 'How many people will participate in this creative effort?\n\n'
    error4 = 'Please, provide an integer number!\n'
    team_size = questionnaire(question4, np.arange(1,10000,1), error4)[0][0]

    question5 = 'Are you developing a Product or a Service?\n(1) Product\n(2) Service\n'
    error5 = 'Please, provide 1 or 2 as answer!\n'
    expected_solution = questionnaire(question5, [1,2], error5)[0][0]

    question6 = 'Is it possible to inclue one of these participants in this creative effort?\n(0) No\n(1) Users\n(2) Experts\n(3) Service Staff\n(4) Stakeholders\nIf more than one, divide them by comas (,)'
    error6 = 'Please, provide numbers!\n'
    participants = questionnaire(question6, [0,1,2,3,4], error6)
    participants = [ans[0] for ans in participants]

    #inputs preparation
    indexes = df[df.iloc[:,design_step]==1.].index #duplicating entries with DS = 1.
    df_middle_second = pd.concat((df_middle_second,df_middle_second.iloc[indexes]), axis = 0)
    df_middle_second.reset_index(drop = True, inplace = True)

    df_output = pd.concat((df_output,df_output.iloc[indexes]), axis = 0)
    df_output.reset_index(drop = True, inplace = True)

    user_inputs = pd.DataFrame(0., index = ['Results'], columns = df.iloc[:,1:16].columns.values) #put user inputs in a DF

    user_inputs.iloc[:,design_step-1] = 1.

    if innovation_focus == 1:
        user_inputs.loc[:,['IF Incremental','IF Evolutive','IF Radical']] = [1,0,0]
    elif innovation_focus == 2:
        user_inputs.loc[:,['IF Incremental','IF Evolutive','IF Radical']] = [0,1,0]
    else:
        user_inputs.loc[:,['IF Incremental','IF Evolutive','IF Radical']] = [0,0,1]
    '''    
    if_incremental = -2.*innovation_focus+1.
    user_inputs.loc[:,'IF Incremental'] = if_incremental if if_incremental > 0. else 0.
    user_inputs.loc[:,'IF Evolutive'] = 1.-abs(2.*innovation_focus-1.)
    if_radical = 2.*innovation_focus-1.
    user_inputs.loc[:,'IF Radical'] = if_radical if if_radical > 0. else 0.
    '''
    user_inputs.loc[:,'Team cohesion'] = team_cohesion

    team_size = (team_size - 1.)/(6.-1.)
    user_inputs.loc[:,'Team size min'] = team_size if team_size <= 1 else 1.

    if expected_solution == 1:
        user_inputs.drop('ES Service', inplace = True, axis = 1)
        df_input.drop('ES Service', inplace = True, axis = 1)
        df_middle_second.drop('ES Service', inplace = True, axis = 1)
        added_to_second.remove('ES Service')
        user_inputs.loc[:,'ES Product'] = 1.
    else:
        user_inputs.drop('ES Product', inplace = True, axis = 1)
        df_input.drop('ES Product', inplace = True, axis = 1)
        df_middle_second.drop('ES Product', inplace = True, axis = 1)
        added_to_second.remove('ES Product')
        user_inputs.loc[:,'ES Service'] = 1.

    a = {1: 'Part Users', 2: 'Part Experts', 3: 'Part Service staff', 4: 'Part Stakeholders'}
    if participants != [0]:
        for part in participants:
            column = a[part]
            user_inputs.loc[:,column] = 1.

    #ML models
    #regression

    rgr = {}
    for feature in df_middle:
        X_rgr = df_input
        y_rgr = df_middle[feature]
        estimator = GradientBoostingRegressor(learning_rate = 0.4, n_estimators = 25, max_depth = 5)
        rgr.update({feature: estimator.fit(X_rgr,y_rgr)})

    #classification

    X_clf = df_middle_second
    y_clf = df_output

    clf = LogisticRegression(multi_class = 'multinomial', solver = 'newton-cg', C = 0.7)
    clf.fit(X_clf, y_clf)

    #predictions

    predict_first = pd.DataFrame(index = ['Results'], columns = df_middle.columns.values) #create DF to 1st prediction

    for feature in df_middle:
        predict_first.loc['Results',feature] = rgr[feature].predict(user_inputs)[0] #predict values for CITs characteristics

    predict_first = pd.concat([predict_first, user_inputs[added_to_second]], axis = 1) #add missing values from user inputs
    predict_first = predict_first.apply(pd.to_numeric, errors='coerce')

    predict_second = pd.DataFrame(index = ['Results'], columns = sorted(df['Name'].unique())) #create DF to 2nd prediction
    predict_second.loc[:,:] = clf.predict_proba(predict_first) #predict adequate CITs

    #predict_second[df['Name'][df.iloc[:,design_step]!=0].unique()] *= 1.25

    #results

    results = pd.DataFrame(index = ['Results'], columns = ['First', 'proba1', 'Second', 'proba2', 'Third', 'proba3'])

    best_three = predict_second.sort_values(by = ['Results'], axis = 1, ascending = False).iloc[:,:3]
    
    bag = []
    sort = predict_second.sort_values(by = ['Results'], axis = 1, ascending = False)
    for i, item in enumerate(sort.iloc[0,:-1]):
        if item == sort.iloc[0,i+1] and item != 0:
            bag.append(i)
            bag.append(i+1)
    
    if bag:
        if bag[0] == 2:
            draw = random.choice(list(set(bag)))
            best_three.drop(best_three.iloc[:,2].name, axis = 1, inplace = True)
            best_three[sort.iloc[:,draw].name] = sort.iloc[:,draw][0]

    results.loc[:,:] = [best_three.columns[0], best_three.iloc[0,0], best_three.columns[1], best_three.iloc[0,1],
                        best_three.columns[2], best_three.iloc[0,2]]

    for column in [5,3,1]:
        if results.iloc[:,column][0] < 0.01: #removes probas below tolerance
            results.drop([results.iloc[:,column-1].name, results.iloc[:,column].name], axis = 1, inplace = True)

    #list of adequate CITs
    print('Adequate techniques (probabilities)')
    for column in results.columns[::2]:
        print('{} technique: {} ({:.0f}%)'.format(column, results[column][0], float(results.iloc[:,results.columns.get_loc(column)+1][0])*100))

    #CITs characteristics
    print('\n\nMain characteristics of adequate techniques')
    print('Creative process stage: {}'.format(user_inputs.iloc[:,design_step-1].name.split()[-1]))
    print('Required execution time: {}/10'.format(int(round(predict_first.loc[:,'Execution time'][0],1)*10)))
    print('Difficulty of use: {}/10'.format(int(round(predict_first.loc[:,'Difficulty of use'][0],1)*10)))
    print('Technique structure: {}/10'.format(int(round(predict_first.loc[:,'CIT Structure'][0],1)*10)))
    print('Creative component to focus: {}'.format(predict_first[['CC Users','CC Organization','CC Business','CC Frame','CC Concepts','CC Prototypes']].idxmax(axis=1)[0].split()[-1]))

    #finding the importance of each user input to the inference process
    feature_importances = [0]*df_input.shape[1]
    for key in rgr.keys():
        feature_importances+= rgr[key].feature_importances_
    feature_importances = pd.DataFrame(feature_importances.reshape(1,-1), index = [0], columns = df_input.columns.values)

    columns = ['Design step', 'Innovation focus', 'Team cohesion', 'Team size', 'Expected solution', 'Participants']

    DS = np.sum(feature_importances.loc[:,['DS Discover', 'DS Define', 'DS Develop', 'DS Deliver']], axis = 1)[0]
    IF = np.sum(feature_importances.loc[:,['IF Incremental', 'IF Evolutive', 'IF Radical']], axis = 1)[0]
    TC = np.sum(feature_importances.loc[:,['Team cohesion']], axis = 1)[0]
    TS = np.sum(feature_importances.loc[:,['Team size min']], axis = 1)[0]
    try:
        ES = np.sum(feature_importances.loc[:,['ES Product']], axis = 1)[0] 
    except:
        ES = np.sum(feature_importances.loc[:,['ES Service']], axis = 1)[0]
    Pa = np.sum(feature_importances.loc[:,['Part Users', 'Part Experts', 'Part Service staff', 'Part Stakeholders']], axis = 1)[0]

    factor_importances = np.array([DS*2, IF, TC, TS, ES, Pa])
    factor_importances = pd.DataFrame(factor_importances.reshape(1,-1), index = [0], columns = columns)
    sorted(factor_importances)

    print('\n\nImportance of each question to the decision process:')
    labels = factor_importances.columns
    heights = factor_importances.values[0]
    heights = [round(float(v),1) for v in heights]
    plt.bar(x = labels, height = heights)
    plt.xticks(rotation = 45)
    for i, v in enumerate(heights):
        plt.text(i-0.4, v+0.05, str(round(v,3)), color='black', rotation = 0)
    plt.show()
    
    retry = input('Retry? (y/n) ')
    if retry == 'n':
        retention_question = 'Please, if the technique you chose was helpful, inform the number of the one you chose.\nIf none was useful, type 0.\n'
        retention_error = 'Please, provide a number as answer!\n'
        retention = questionnaire(retention_question, [0,1,2,3], retention_error)
        if retention[0][0] != 0:
            chosen_technique = results[results.columns[::2]].iloc[:,retention[0][0]-1]

            case_retention = pd.concat((user_inputs, predict_first.iloc[:,:9]), axis = 1)
            if expected_solution == 1:
                case_retention['ES Service'] = 0;
            elif expected_solution == 2:
                case_retention['ES Product'] = 0;
            case_retention['Name'] = chosen_technique
            case_retention = case_retention.reindex(df.columns.values, axis = 1)
            df = df.append(case_retention).reset_index(drop = True)
            df.to_csv('DataFrame.csv', sep = ';')
        break
    clear_output()
    plt.clf()

In [4]:
df

Unnamed: 0,Name,DS Discover,DS Define,DS Develop,DS Deliver,IF Incremental,IF Evolutive,IF Radical,Team cohesion,Team size min,...,Part Stakeholders,Execution time,Difficulty of use,CIT Structure,CC Users,CC Organization,CC Business,CC Frame,CC Concepts,CC Prototypes
0,5 Whys,1.0,0.0,0.0,0.0,0.5,0.0,0.0,0.9,0.0,...,0.00,0.000000,0.100000,0.000000,0.500000,0.500000,0.000000,0.500000,0.000000,0.000000
1,Contextual Interview,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.5,0.0,...,1.00,0.400000,0.800000,0.300000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000
2,Functional Analysis,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.1,0.0,...,0.00,0.400000,0.600000,0.800000,0.000000,0.500000,0.000000,1.000000,0.250000,0.000000
3,How Might We,1.0,0.0,0.0,0.0,0.5,0.0,0.0,0.7,0.2,...,0.00,0.400000,0.600000,0.000000,0.250000,0.250000,0.250000,1.000000,0.000000,0.000000
4,Mind Map,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.5,0.0,...,0.25,0.200000,0.200000,0.300000,0.500000,0.500000,0.000000,0.250000,0.750000,0.000000
5,Shadowing,1.0,0.0,0.0,0.0,0.5,0.0,0.0,0.5,0.0,...,0.00,0.700000,0.500000,0.300000,1.000000,0.250000,0.250000,0.000000,0.000000,0.000000
6,Stakeholder Map,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.4,0.0,...,0.50,0.200000,0.100000,0.300000,1.000000,1.000000,1.000000,0.250000,0.000000,0.000000
7,Traditional Brainstorming,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.8,...,0.00,0.200000,0.500000,0.300000,0.250000,0.250000,0.250000,0.250000,1.000000,0.000000
8,5 Whys,0.0,1.0,0.0,0.0,0.5,0.0,0.0,0.9,0.0,...,0.00,0.000000,0.100000,0.000000,0.500000,0.500000,0.000000,0.500000,0.000000,0.000000
9,Affinity Diagram,0.0,1.0,0.0,0.0,0.5,0.0,0.0,0.7,0.6,...,0.00,0.300000,0.400000,0.400000,0.000000,0.000000,0.000000,0.750000,0.750000,0.000000
