In [2]:
# Imports 

import pandas as pd
import numpy as np
from df2gspread import df2gspread as d2g
import inspect as inspect 
import docx
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from utils import *
from spidercharts import *
from constants import *
from company_profiles import *
import matplotlib.pyplot as plt
import matplotlib as mpl
from math import pi

In [3]:
# Get the data from Google Spreadsheet using the parameters

copy_df = access_google_spreadsheet(
    scope = "https://spreadsheets.google.com/feeds", 
    json_keyfile_name = "Jupyter_meets_GSheet-a279ad757691.json", # This file has to be in the same folder. 
    spreadsheet_key = # You get this from the link of the GSheet - hidden before committing to github
    worksheet = "Full_DB") # Tab you want to import 
    
# Store the df ascsv in case people want to use it in Microsoft (Eww). 
raw_df = copy_df
raw_df = raw_df.set_index("Company_Name")

# Get list of columns to keep.  
cols_ls = columns_to_keep("Score") # I want to get all scores 
cols_ls.extend(("Company_Name", "Company_Sector")) # And add the name of the company and the sector they belong to

# Create the scores DataFrame, index is name of company
scores_df = copy_df[cols_ls].set_index("Company_Name")
# scores_df # It works! Hurray!


In [4]:
# List of columns I will need when creating the sections_scores df
col_ls = ["Section_1", "Section_2", "Section_3", "Section_4", "Section_5", "Section_6", "Section_7", "Section_8", "Section_9", "Section_10", "TRAC_Index", "Bands"]

# Create section_scores_df
sections_scores_df = create_sections_scores_df(col_ls, index = scores_df.index)
# sections_scores_df # It works! Hurray!

# Pupulate the sections_scores_df by calculating the average per company per section and the TRAC Index.
# 10 is the number of sections I know from the methodology. 
sections_scores_df = populate_sections_scores_df(scores_df.index, len(sections_ls), scores_df, sections_scores_df)
sections_scores_df = assign_bands(sections_scores_df)
#sections_scores_df # It works! Hurray!


In [5]:
# Add columns on sector by merging initial copy DataFrame with the section scores DataFrame on Company Name. 
sections_scores_df = pd.merge(sections_scores_df,copy_df[['Company_Name','Company_Sector']],on='Company_Name', how='left')
sections_scores_df = sections_scores_df.set_index("Company_Name")

#sections_scores_df.to_csv("Sections_scores_aggregated_data")
sections_scores_df # It works! Hurray!

# Create dataframe to store the results of sections over the sectors
grouped_scores_df = pd.DataFrame()

for section_i in list(range(1, 11)):  
    grouped_scores_df = grouped_scores_df.append(round(sections_scores_df.groupby("Company_Sector")["Section_{}".format(section_i)].mean(), 2))

grouped_scores_df_transposed = grouped_scores_df.transpose()
# grouped_scores_df_transposed # It works! Hurray!


In [10]:
# Let's create the folder structture where we will store the results of the analysis 
create_folders_structure(root, sub_folders_ls, sections_scores_df.index[:-1])

# Before storing our df let's replace all NaN with None for better compatibility with SQL (just in case). 
scores_df = scores_df.where(scores_df.notnull(), None)
sections_scores_df = sections_scores_df.where(sections_scores_df.notnull(), None)
grouped_scores_df_transposed = grouped_scores_df_transposed.where(grouped_scores_df_transposed.notnull(), None)

# Extend all dataframes created to list of dataframes
# List where I wil store all the df I will create and will use to extract them as csv and save them in the folder "deliverables/data"

all_dataframes_dict = {'scores_df' : scores_df,
                       'sections_scores_df': sections_scores_df,
                       'grouped_scores_df_transposed' : grouped_scores_df_transposed,
                       'raw_data': raw_df}

# Store the all dataframes as csv in the corresponding folders. sub_folder_ls[0] is "data", see params.py
for key, value in all_dataframes_dict.items():
    store_file(value, key, sub_folders_ls[0])

In [11]:
# Create company profiles and store them in the company folders created by the create folder structure function. 
create_save_company_profiles(raw_df, sections_scores_df) # It Works! Hurray!

In [8]:
spider_charts_dfs_dict = create_and_store_df_for_spidercharts(sections_scores_df, grouped_scores_df_transposed)

In [9]:
# Let's create the folder structture where we will store the results of the analysis 
# create_folders_structure(root, sub_folders_ls, sections_scores_df.index[:-1])
make_and_save_spidercharts(spider_charts_dfs_dict)


  mask = r < 0
