#### Templates for the graphs

##### Stacked barchart

In [4]:
def stacked_barchart(columns, title='', order=[]):
    #importing the necessary modules 
    import pandas as pd
    import plotly.express as px
    import plotly.graph_objects as go
    

    df = pd.DataFrame()
    for i in columns:
        remove_na = i.dropna(how='all')       #removing the null values from the dataframe columns
        df_to_list = remove_na.tolist()        #converting the panda series to a list 
        list_split = [i.split(',') for i in df_to_list]         #the values inside the column are split by comma
        list_flat = [i for innerlist in list_split for i in innerlist]      #the nested list is made to a flatlist 
        list_wo_space = [i.replace(' ', '') for i in list_flat]     #since there were spaces in some of the entries, spaces were removed to make it uniform 
        dict_values = dict((x,list_wo_space.count(x)) for x in set(list_wo_space))      #the set has only the unique values and it is counted and made to a dictionary 
        dataframe = pd.DataFrame.from_dict(dict_values, orient='index')         #the dictionary with the unique values and the number of occurences are made into a dataframe 
        dataframe = dataframe.transpose() 
        col_name = i.name
        dataframe['kinds'] = col_name
        df = pd.concat([df, dataframe], axis=0)
        
        
    #df.to_csv(title + '.csv') #saving the dataframe as csv file 
        
       
    #creating the stacked bar chart
    fig = go.Figure()
    df = df.set_index('kinds')
    

    for i in df.columns:
        fig.add_trace(go.Bar(name=i, y=df.index,x=df[i],orientation='h', text=df[i]))
        fig.update_layout(barmode='stack')
        fig.update_layout(title =title, 
        title_x=0.5, height =500, title_y=0.9, font=dict(family='Helvetica', color="Black", size=16), legend=dict(title_font_family = 'Helvetica', font=dict(size=16, color="Black")))
        fig.update_yaxes(categoryorder='array', categoryarray = order)
    #fig.to_image('image' + title + '.svg')
    return fig.show()

    #  pip install -U kaleido- do you include this in the function?
    

##### Percentage stacked bar charts 

In [5]:
def percentage_stackedcharts(columns, title='', order=[]):
    #importing the necessary modules 
    import pandas as pd
    import plotly.express as px
    import plotly.graph_objects as go

    df = pd.DataFrame()
    for i in columns:
        dataframe = i.value_counts().to_frame() # counting the number of occurences of each unique values
        df = pd.concat([df,dataframe], axis=1)  # Appending to the values to the dataframe 
    
    df = df.reset_index()           #resetting the index of the dataframe such that it is easier for making graphs
    df = df.rename(columns={'index':'interest'}) 
    df = df.set_index('interest').transpose()
    df.columns = df.columns.str.replace(' ', '')
    df.columns
    

    #creating a dataframe for the percentage values
    per_df = pd.DataFrame()         #creating a separate dataframe for percentage values
    for col in df.columns:
        per_df['percentage '+col] = [((i/df.iloc[0, 0:4].sum())*100) for i in df[col]]  #calculating the percentage values 
        per_df['percentage '+col] = per_df['percentage '+col].round(decimals=1)
    per_df.index =df.index  #defining the index of the dataframe to be similar as the previous dataframe 
    
    
    
    #saving the dataframe to a csv file 
    #df.to_csv(title +'.csv')


    #creating the chart
    fig = go.Figure()

    for i in per_df.columns:
        name = i.split()
        fig.add_trace(go.Bar(name=name[1],x=per_df.index, y=per_df[i], text=[f'{val}%' for val in per_df[i]]))
    
    fig.update_layout(width=800, height=700, barmode='stack')
    fig.update_yaxes(title='Percent')
    fig.update_xaxes(categoryorder='array', categoryarray = order)
    fig.update_layout(title=title, title_x = 0.5, font=dict(family='Helvetica', color="Black", size=16), legend=dict(title_font_family = 'Helvetica', font=dict(size=16, color="Black")))
    #fig.write_image('image'+ title + '.svg')
    return fig.show()
    

##### Wordcloud

In [6]:
# defining a function to make a wordcloud 
# input - Specific columns of a dataframe with the question on the survey data form as the column name
def wordcloud(df,extra_stopwords=[]):
    from wordcloud import WordCloud, STOPWORDS
    import matplotlib.pyplot as plt
    input = ''.join(df.str.lower().str.split().dropna(how='all').astype(str).str.replace(r'[-./?!,":;()\']',' '))
    words_to_remove = df.name.split() 
    stopwords_new = words_to_remove + list(STOPWORDS) + extra_stopwords
    wc_image = WordCloud(stopwords=stopwords_new, background_color='white', width=600, height=600, random_state=4).generate(input)
    plt.imshow(wc_image)
    plt.tight_layout()
    plt.title(df.name)
    plt.axis('off')
    plt.savefig('Image-' +df.name+ '.png')