### Dependencies 

In [40]:
import pandas as pd
from sqlalchemy import create_engine, inspect
from db_config import password
import psycopg2
import json
import numpy as np

### Importing the CSV files

In [2]:
#Behavior_and_Attitudes
file = "./Resources/Behavior_and_Attitudes.csv"
Behavior_and_Attitudes= pd.read_csv(file)
Behavior_and_Attitudes.head()



Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
0,374,Armenia,2019,53.89,70.0,48.22,32.2,20.97,7.84,0.59,,0.64,,30.55,,7.55,73.41,87.24
1,61,Australia,2019,45.74,56.04,47.37,13.02,10.52,6.53,8.28,,0.72,,24.64,,26.31,74.0,56.39
2,375,Belarus,2019,29.52,42.34,37.96,6.59,5.78,2.72,0.49,,0.82,,28.23,,10.16,69.9,70.3
3,55,Brazil,2019,46.37,61.99,35.57,30.17,23.3,16.16,0.62,,0.98,,8.93,,7.57,72.25,75.25
4,101,Canada,2019,67.08,56.8,47.17,11.86,18.16,7.44,5.4,,0.71,,21.18,,12.24,79.9,69.21


In [3]:
#framework_conditions

df_data = pd.DataFrame(pd.read_csv("./Resources/Framework_Conditions.csv"))
df_data.head()

Unnamed: 0,code,economy,year,Financing for entrepreneurs,Governmental support and policies,Taxes and bureaucracy,Governmental programs,Basic school entrepreneurial education and training,Post school entrepreneurial education and training,R&D transfer,Commercial and professional infrastructure,Internal market dynamics,Internal market openness,Physical and services infrastructure,Cultural and social norms
0,213,Algeria,2011,2.91,3.16,2.63,3.11,2.22,2.67,2.73,3.12,3.4,2.59,3.18,2.99
1,213,Algeria,2012,3.24,3.29,2.8,3.07,2.19,3.32,2.81,2.83,3.75,3.04,3.33,3.17
2,213,Algeria,2013,3.42,3.19,2.56,2.75,2.45,3.16,2.88,2.86,4.0,2.97,3.47,3.19
3,244,Angola,2010,2.11,2.31,2.16,1.97,1.81,2.41,1.72,2.39,2.76,2.0,2.28,2.84
4,244,Angola,2012,2.74,2.74,2.19,2.16,1.93,2.24,1.71,2.65,2.94,2.15,2.28,2.94


## Data Munging

### Behaviour and Attitudes

#### No of unique countries in the Survey

In [4]:
print(f"No of unique countries in the survey : {len(Behavior_and_Attitudes['economy'].unique())}")


No of unique countries in the survey : 113


#### Understanding the Number of economies every year. 

We could see that not every country is been surveyed in all the years. 2001 was the year with minimum countries(28) in the survey and 2013 and 2014 had 70 countries participating. The latest year 2019 have 50 economies surveyed.

In [5]:
Behavior_and_Attitudes["year"].value_counts()

2014    70
2013    70
2012    67
2016    64
2015    60
2010    59
2011    55
2009    54
2017    54
2019    50
2018    49
2008    43
2007    42
2006    42
2002    37
2005    35
2004    34
2003    31
2001    28
Name: year, dtype: int64

#### Null Values

The dataset have null values in certain column and that has been identified below. The columns with null values are,
1. Fear of failure rate * 
2. Entrepreneurial intentions
3. Established Business Ownership 
4. Entrepreneurial Employee Activity
5. Motivational Index 
6. Female/Male Opportunity-Driven TEA 
7. High Job Creation Expectation
8. Innovation
9. Business Services Sector
10. High Status to Successful Entrepreneurs
11. Entrepreneurship as a Good Career Choice 

In [6]:
# identifying missing values
Behavior_and_Attitudes.count()

code                                                944
economy                                             944
year                                                944
Perceived opportunities                             944
Perceived capabilities                              944
Fear of failure rate *                              943
Entrepreneurial intentions                          916
Total early-stage Entrepreneurial Activity (TEA)    944
Established Business Ownership                      943
Entrepreneurial Employee Activity                   458
Motivational Index                                  548
Female/Male TEA                                     944
Female/Male Opportunity-Driven TEA                  367
High Job Creation Expectation                       941
Innovation                                          489
Business Services Sector                            906
High Status to Successful Entrepreneurs             837
Entrepreneurship as a Good Career Choice        

### Fear of failure rate--dealing with null value

In [7]:
# Fear of failure rate has just one null value. identifying the row or economy with null value

Behavior_and_Attitudes.loc[Behavior_and_Attitudes["Fear of failure rate *"].isna()]

Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
736,582,Venezuela,2007,56.04,66.24,,20.68,20.16,5.39,,,0.72,,25.62,,6.72,71.7,76.57


In [8]:
# pulling all the data point related to Venezuela
Behavior_and_Attitudes.loc[Behavior_and_Attitudes["economy"]=="Venezuela"]

Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
538,582,Venezuela,2011,48.45,66.86,24.15,20.23,15.43,1.57,0.63,1.52,0.88,,,13.44,6.49,77.26,83.06
650,582,Venezuela,2009,48.21,59.31,25.61,28.7,18.66,6.51,,,0.91,,23.78,,,68.88,76.24
736,582,Venezuela,2007,56.04,66.24,,20.68,20.16,5.39,,,0.72,,25.62,,6.72,71.7,76.57
813,582,Venezuela,2005,65.15,74.94,30.95,40.0,24.95,8.59,,,0.91,,30.0,,8.67,77.0,84.32
878,582,Venezuela,2003,43.04,82.14,30.73,37.28,26.81,9.63,,,0.88,,28.36,,10.08,73.01,79.67


#### Treating the one null value in Fear of Failure rate
Since there are five data points, the one null value can be filled by calculating the mean from four other fear of 
failure rate data.

In [9]:
#calculating mean failure rate

mean_ffrate=Behavior_and_Attitudes.loc[(Behavior_and_Attitudes["economy"]=="Venezuela") & (Behavior_and_Attitudes["year"]!=2007),:]["Fear of failure rate *"].mean()

print(f"The data is updated with the mean value {mean_ffrate}")

# adding it to the df

Behavior_and_Attitudes["Fear of failure rate *"]=Behavior_and_Attitudes["Fear of failure rate *"].fillna(mean_ffrate)

#Displaying the DF with the changes made

Behavior_and_Attitudes.loc[Behavior_and_Attitudes["economy"]=="Venezuela"]

The data is updated with the mean value 27.86


Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
538,582,Venezuela,2011,48.45,66.86,24.15,20.23,15.43,1.57,0.63,1.52,0.88,,,13.44,6.49,77.26,83.06
650,582,Venezuela,2009,48.21,59.31,25.61,28.7,18.66,6.51,,,0.91,,23.78,,,68.88,76.24
736,582,Venezuela,2007,56.04,66.24,27.86,20.68,20.16,5.39,,,0.72,,25.62,,6.72,71.7,76.57
813,582,Venezuela,2005,65.15,74.94,30.95,40.0,24.95,8.59,,,0.91,,30.0,,8.67,77.0,84.32
878,582,Venezuela,2003,43.04,82.14,30.73,37.28,26.81,9.63,,,0.88,,28.36,,10.08,73.01,79.67


### Entrepreneural intentions--dealing with null value

All the economies that was surveyed in 2001 might not have questions on entrepreneurial intentions and hence the datapoint is null for all 28 economies.


In [10]:
#identifying the null values

Behavior_and_Attitudes.loc[Behavior_and_Attitudes["Entrepreneurial intentions"].isna()]

Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
916,54,Argentina,2001,19.83,54.8,37.17,,9.92,3.92,,,0.38,,17.81,,16.72,,
917,61,Australia,2001,31.27,59.66,38.18,,14.68,27.96,,,0.48,,28.79,,30.32,,
918,32,Belgium,2001,19.96,30.32,37.87,,4.19,3.02,,,0.51,,13.54,,18.85,,
919,55,Brazil,2001,40.6,54.28,32.14,,13.8,3.79,,,0.62,,14.44,,9.7,,
920,101,Canada,2001,34.54,53.03,26.89,,10.27,3.89,,,0.63,,12.37,,27.06,,
921,45,Denmark,2001,45.78,40.81,23.72,,7.23,4.25,,,0.43,,3.48,,43.89,,
922,358,Finland,2001,54.91,37.61,35.17,,8.16,7.46,,,0.54,,19.09,,35.07,,
923,33,France,2001,6.89,20.01,28.71,,5.72,1.62,,,0.43,,8.37,,24.89,,
924,49,Germany,2001,23.65,30.11,41.75,,6.28,4.18,,,0.44,,17.44,,32.0,,
925,36,Hungary,2001,8.63,55.3,10.44,,10.86,5.9,,,0.57,,15.79,,20.25,,


### Established Business Ownership- null values

Replaced the single value with the closest data point.

In [11]:
Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Established Business Ownership'].isna()]

Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
928,972,Israel,2001,17.98,35.64,33.19,,5.29,,,,0.28,,47.47,,52.21,,


In [12]:
Behavior_and_Attitudes.loc[Behavior_and_Attitudes['economy']=='Israel']

Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
18,972,Israel,2019,46.0,43.34,55.36,21.2,12.69,5.45,5.75,,0.69,,21.73,,27.06,84.13,64.21
70,972,Israel,2018,56.23,41.48,47.47,26.2,12.7,4.2,7.21,3.3,0.7,1.1,22.9,32.9,7.3,84.98,65.96
121,972,Israel,2017,58.29,44.14,47.96,26.42,12.78,3.32,8.55,2.02,0.72,1.0,8.66,26.7,27.3,86.07,65.16
183,972,Israel,2016,53.69,41.1,48.65,20.61,11.31,4.0,7.3,2.6,0.71,1.15,22.1,30.4,37.37,85.5,64.2
243,972,Israel,2015,55.5,41.56,47.76,21.59,11.82,3.9,6.55,3.29,0.65,1.02,23.6,30.78,32.9,86.24,64.48
374,972,Israel,2013,46.5,36.17,51.76,23.97,10.04,5.94,,2.83,0.48,1.03,23.84,34.13,31.91,80.3,60.61
445,972,Israel,2012,30.62,29.31,46.76,12.81,6.53,3.78,4.24,2.41,0.72,,21.33,29.15,24.05,72.39,59.47
564,972,Israel,2010,33.88,39.94,46.71,13.45,5.02,3.25,,2.24,0.5,,32.1,,26.9,73.23,60.12
619,972,Israel,2009,28.99,38.27,37.27,13.62,6.07,4.27,,,0.52,,32.43,,,73.24,61.4
674,972,Israel,2008,24.78,37.83,44.81,14.19,6.36,4.12,,,0.46,,23.77,,33.65,74.13,56.23


In [13]:
#Replacing with the closest value.
Behavior_and_Attitudes["Established Business Ownership"]=Behavior_and_Attitudes["Established Business Ownership"].fillna(5.66)
Behavior_and_Attitudes.loc[Behavior_and_Attitudes['economy']=='Israel']

Unnamed: 0,code,economy,year,Perceived opportunities,Perceived capabilities,Fear of failure rate *,Entrepreneurial intentions,Total early-stage Entrepreneurial Activity (TEA),Established Business Ownership,Entrepreneurial Employee Activity,Motivational Index,Female/Male TEA,Female/Male Opportunity-Driven TEA,High Job Creation Expectation,Innovation,Business Services Sector,High Status to Successful Entrepreneurs,Entrepreneurship as a Good Career Choice
18,972,Israel,2019,46.0,43.34,55.36,21.2,12.69,5.45,5.75,,0.69,,21.73,,27.06,84.13,64.21
70,972,Israel,2018,56.23,41.48,47.47,26.2,12.7,4.2,7.21,3.3,0.7,1.1,22.9,32.9,7.3,84.98,65.96
121,972,Israel,2017,58.29,44.14,47.96,26.42,12.78,3.32,8.55,2.02,0.72,1.0,8.66,26.7,27.3,86.07,65.16
183,972,Israel,2016,53.69,41.1,48.65,20.61,11.31,4.0,7.3,2.6,0.71,1.15,22.1,30.4,37.37,85.5,64.2
243,972,Israel,2015,55.5,41.56,47.76,21.59,11.82,3.9,6.55,3.29,0.65,1.02,23.6,30.78,32.9,86.24,64.48
374,972,Israel,2013,46.5,36.17,51.76,23.97,10.04,5.94,,2.83,0.48,1.03,23.84,34.13,31.91,80.3,60.61
445,972,Israel,2012,30.62,29.31,46.76,12.81,6.53,3.78,4.24,2.41,0.72,,21.33,29.15,24.05,72.39,59.47
564,972,Israel,2010,33.88,39.94,46.71,13.45,5.02,3.25,,2.24,0.5,,32.1,,26.9,73.23,60.12
619,972,Israel,2009,28.99,38.27,37.27,13.62,6.07,4.27,,,0.52,,32.43,,,73.24,61.4
674,972,Israel,2008,24.78,37.83,44.81,14.19,6.36,4.12,,,0.46,,23.77,,33.65,74.13,56.23


### Entrepreneurial employee activity, Motivational Index , Female/Male Opportunity-Driven TEA ,Innovation,High Status to Successful Entrepreneurs, Entrepreneurship as a Good Career Choice, Business Services Sector, High Job Creation Expectation --missing values
These columns have more than 100 missing values and will be only used for plotting purposes.

In [14]:
print(f"Missing values in Entrepreneurial Employee Activity is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Entrepreneurial Employee Activity'].isna()])}")
print(f"Missing values in Motivational Index is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Motivational Index'].isna()])}")
print(f"Missing values in Female/Male Opportunity-Driven TEA is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Female/Male Opportunity-Driven TEA'].isna()])}")
print(f"Missing values in Innovation is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Innovation'].isna()])}")
print(f"Missing values in High Status to Successful Entrepreneurs is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['High Status to Successful Entrepreneurs'].isna()])}")
print(f"Missing values in Entrepreneurship as a Good Career Choice is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Entrepreneurship as a Good Career Choice'].isna()])}")
print(f"Missing values in  Business Services Sector is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['Business Services Sector'].isna()])}")
print(f"Missing values in High Job Creation Expectation is :{len(Behavior_and_Attitudes.loc[Behavior_and_Attitudes['High Job Creation Expectation'].isna()])}")


Missing values in Entrepreneurial Employee Activity is :486
Missing values in Motivational Index is :396
Missing values in Female/Male Opportunity-Driven TEA is :577
Missing values in Innovation is :455
Missing values in High Status to Successful Entrepreneurs is :107
Missing values in Entrepreneurship as a Good Career Choice is :110
Missing values in  Business Services Sector is :38
Missing values in High Job Creation Expectation is :3


### Changing column names

In [15]:
Behavior_and_Attitudes.columns = Behavior_and_Attitudes.columns.str.replace(' ','_')
Behavior_and_Attitudes.head()

Unnamed: 0,code,economy,year,Perceived_opportunities,Perceived_capabilities,Fear_of_failure_rate_*,Entrepreneurial_intentions,Total_early-stage_Entrepreneurial_Activity_(TEA),Established_Business_Ownership,Entrepreneurial_Employee_Activity,Motivational_Index,Female/Male_TEA,Female/Male_Opportunity-Driven_TEA,High_Job_Creation_Expectation,Innovation,Business_Services_Sector,High_Status_to_Successful_Entrepreneurs,Entrepreneurship_as_a_Good_Career_Choice
0,374,Armenia,2019,53.89,70.0,48.22,32.2,20.97,7.84,0.59,,0.64,,30.55,,7.55,73.41,87.24
1,61,Australia,2019,45.74,56.04,47.37,13.02,10.52,6.53,8.28,,0.72,,24.64,,26.31,74.0,56.39
2,375,Belarus,2019,29.52,42.34,37.96,6.59,5.78,2.72,0.49,,0.82,,28.23,,10.16,69.9,70.3
3,55,Brazil,2019,46.37,61.99,35.57,30.17,23.3,16.16,0.62,,0.98,,8.93,,7.57,72.25,75.25
4,101,Canada,2019,67.08,56.8,47.17,11.86,18.16,7.44,5.4,,0.71,,21.18,,12.24,79.9,69.21


In [16]:
Behavior_and_Attitudes=Behavior_and_Attitudes.rename(columns={"economy":"country"})
Behavior_and_Attitudes.head()


Unnamed: 0,code,country,year,Perceived_opportunities,Perceived_capabilities,Fear_of_failure_rate_*,Entrepreneurial_intentions,Total_early-stage_Entrepreneurial_Activity_(TEA),Established_Business_Ownership,Entrepreneurial_Employee_Activity,Motivational_Index,Female/Male_TEA,Female/Male_Opportunity-Driven_TEA,High_Job_Creation_Expectation,Innovation,Business_Services_Sector,High_Status_to_Successful_Entrepreneurs,Entrepreneurship_as_a_Good_Career_Choice
0,374,Armenia,2019,53.89,70.0,48.22,32.2,20.97,7.84,0.59,,0.64,,30.55,,7.55,73.41,87.24
1,61,Australia,2019,45.74,56.04,47.37,13.02,10.52,6.53,8.28,,0.72,,24.64,,26.31,74.0,56.39
2,375,Belarus,2019,29.52,42.34,37.96,6.59,5.78,2.72,0.49,,0.82,,28.23,,10.16,69.9,70.3
3,55,Brazil,2019,46.37,61.99,35.57,30.17,23.3,16.16,0.62,,0.98,,8.93,,7.57,72.25,75.25
4,101,Canada,2019,67.08,56.8,47.17,11.86,18.16,7.44,5.4,,0.71,,21.18,,12.24,79.9,69.21


In [17]:
Behavior_and_Attitudes=Behavior_and_Attitudes.rename(columns={"Fear_of_failure_rate_*":"Fear_of_failure_rate"})
Behavior_and_Attitudes.head()

Unnamed: 0,code,country,year,Perceived_opportunities,Perceived_capabilities,Fear_of_failure_rate,Entrepreneurial_intentions,Total_early-stage_Entrepreneurial_Activity_(TEA),Established_Business_Ownership,Entrepreneurial_Employee_Activity,Motivational_Index,Female/Male_TEA,Female/Male_Opportunity-Driven_TEA,High_Job_Creation_Expectation,Innovation,Business_Services_Sector,High_Status_to_Successful_Entrepreneurs,Entrepreneurship_as_a_Good_Career_Choice
0,374,Armenia,2019,53.89,70.0,48.22,32.2,20.97,7.84,0.59,,0.64,,30.55,,7.55,73.41,87.24
1,61,Australia,2019,45.74,56.04,47.37,13.02,10.52,6.53,8.28,,0.72,,24.64,,26.31,74.0,56.39
2,375,Belarus,2019,29.52,42.34,37.96,6.59,5.78,2.72,0.49,,0.82,,28.23,,10.16,69.9,70.3
3,55,Brazil,2019,46.37,61.99,35.57,30.17,23.3,16.16,0.62,,0.98,,8.93,,7.57,72.25,75.25
4,101,Canada,2019,67.08,56.8,47.17,11.86,18.16,7.44,5.4,,0.71,,21.18,,12.24,79.9,69.21


In [18]:
Behavior_and_Attitudes=Behavior_and_Attitudes.rename(columns={"Total_early-stage_Entrepreneurial_Activity_(TEA)":"Total_early_stage_Entrepreneurial_Activity"})
Behavior_and_Attitudes=Behavior_and_Attitudes.rename(columns={"Female/Male_TEA":"Female_Male_TEA"})
Behavior_and_Attitudes=Behavior_and_Attitudes.rename(columns={"Female/Male_Opportunity-Driven_TEA":"Female_Male_Opportunity_Driven_TEA"})
Behavior_and_Attitudes.head()

Unnamed: 0,code,country,year,Perceived_opportunities,Perceived_capabilities,Fear_of_failure_rate,Entrepreneurial_intentions,Total_early_stage_Entrepreneurial_Activity,Established_Business_Ownership,Entrepreneurial_Employee_Activity,Motivational_Index,Female_Male_TEA,Female_Male_Opportunity_Driven_TEA,High_Job_Creation_Expectation,Innovation,Business_Services_Sector,High_Status_to_Successful_Entrepreneurs,Entrepreneurship_as_a_Good_Career_Choice
0,374,Armenia,2019,53.89,70.0,48.22,32.2,20.97,7.84,0.59,,0.64,,30.55,,7.55,73.41,87.24
1,61,Australia,2019,45.74,56.04,47.37,13.02,10.52,6.53,8.28,,0.72,,24.64,,26.31,74.0,56.39
2,375,Belarus,2019,29.52,42.34,37.96,6.59,5.78,2.72,0.49,,0.82,,28.23,,10.16,69.9,70.3
3,55,Brazil,2019,46.37,61.99,35.57,30.17,23.3,16.16,0.62,,0.98,,8.93,,7.57,72.25,75.25
4,101,Canada,2019,67.08,56.8,47.17,11.86,18.16,7.44,5.4,,0.71,,21.18,,12.24,79.9,69.21


## Framework Conditions

In [19]:
df_data.isna()

Unnamed: 0,code,economy,year,Financing for entrepreneurs,Governmental support and policies,Taxes and bureaucracy,Governmental programs,Basic school entrepreneurial education and training,Post school entrepreneurial education and training,R&D transfer,Commercial and professional infrastructure,Internal market dynamics,Internal market openness,Physical and services infrastructure,Cultural and social norms
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
918,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
919,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
920,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
921,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [20]:
df_data.isnull().values.any()

True

In [21]:
df_data.isnull().sum()

code                                                    0
economy                                                 0
year                                                    0
Financing for entrepreneurs                             1
Governmental support and policies                       1
Taxes and bureaucracy                                   1
Governmental programs                                   1
Basic school entrepreneurial education and training     1
Post school entrepreneurial education and training     23
R&D transfer                                            1
Commercial and professional infrastructure              1
Internal market dynamics                                1
Internal market openness                                1
Physical and services infrastructure                    1
Cultural and social norms                              19
dtype: int64

In [22]:
df_data

Unnamed: 0,code,economy,year,Financing for entrepreneurs,Governmental support and policies,Taxes and bureaucracy,Governmental programs,Basic school entrepreneurial education and training,Post school entrepreneurial education and training,R&D transfer,Commercial and professional infrastructure,Internal market dynamics,Internal market openness,Physical and services infrastructure,Cultural and social norms
0,213,Algeria,2011,2.91,3.16,2.63,3.11,2.22,2.67,2.73,3.12,3.40,2.59,3.18,2.99
1,213,Algeria,2012,3.24,3.29,2.80,3.07,2.19,3.32,2.81,2.83,3.75,3.04,3.33,3.17
2,213,Algeria,2013,3.42,3.19,2.56,2.75,2.45,3.16,2.88,2.86,4.00,2.97,3.47,3.19
3,244,Angola,2010,2.11,2.31,2.16,1.97,1.81,2.41,1.72,2.39,2.76,2.00,2.28,2.84
4,244,Angola,2012,2.74,2.74,2.19,2.16,1.93,2.24,1.71,2.65,2.94,2.15,2.28,2.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
918,84,Vietnam,2015,2.12,2.62,2.78,2.14,1.57,2.53,2.33,2.77,3.59,2.51,4.07,3.23
919,84,Vietnam,2017,2.27,1.90,2.40,2.09,1.83,2.61,2.19,2.82,4.15,2.79,4.19,3.62
920,260,Zambia,2010,2.16,2.64,2.64,2.68,2.28,2.99,1.94,3.04,3.08,3.07,3.03,2.52
921,260,Zambia,2012,2.14,2.50,2.64,2.21,1.93,2.46,1.85,3.07,3.11,2.90,3.11,2.59


In [23]:
df_data.dtypes

code                                                     int64
economy                                                 object
year                                                     int64
Financing for entrepreneurs                            float64
Governmental support and policies                      float64
Taxes and bureaucracy                                  float64
Governmental programs                                  float64
Basic school entrepreneurial education and training    float64
Post school entrepreneurial education and training     float64
R&D transfer                                           float64
Commercial and professional infrastructure             float64
Internal market dynamics                               float64
Internal market openness                               float64
Physical and services infrastructure                   float64
Cultural and social norms                              float64
dtype: object

In [24]:
df_data = df_data.set_index(["code"])
df_data

Unnamed: 0_level_0,economy,year,Financing for entrepreneurs,Governmental support and policies,Taxes and bureaucracy,Governmental programs,Basic school entrepreneurial education and training,Post school entrepreneurial education and training,R&D transfer,Commercial and professional infrastructure,Internal market dynamics,Internal market openness,Physical and services infrastructure,Cultural and social norms
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
213,Algeria,2011,2.91,3.16,2.63,3.11,2.22,2.67,2.73,3.12,3.40,2.59,3.18,2.99
213,Algeria,2012,3.24,3.29,2.80,3.07,2.19,3.32,2.81,2.83,3.75,3.04,3.33,3.17
213,Algeria,2013,3.42,3.19,2.56,2.75,2.45,3.16,2.88,2.86,4.00,2.97,3.47,3.19
244,Angola,2010,2.11,2.31,2.16,1.97,1.81,2.41,1.72,2.39,2.76,2.00,2.28,2.84
244,Angola,2012,2.74,2.74,2.19,2.16,1.93,2.24,1.71,2.65,2.94,2.15,2.28,2.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,Vietnam,2015,2.12,2.62,2.78,2.14,1.57,2.53,2.33,2.77,3.59,2.51,4.07,3.23
84,Vietnam,2017,2.27,1.90,2.40,2.09,1.83,2.61,2.19,2.82,4.15,2.79,4.19,3.62
260,Zambia,2010,2.16,2.64,2.64,2.68,2.28,2.99,1.94,3.04,3.08,3.07,3.03,2.52
260,Zambia,2012,2.14,2.50,2.64,2.21,1.93,2.46,1.85,3.07,3.11,2.90,3.11,2.59


In [25]:
df_data = df_data.dropna()

In [26]:
df_data.columns = df_data.columns.str.replace(' ','_').str.lower()

df_data

Unnamed: 0_level_0,economy,year,financing_for_entrepreneurs,governmental_support_and_policies,taxes_and_bureaucracy,governmental_programs,basic_school_entrepreneurial_education_and_training,post_school_entrepreneurial_education_and_training,r&d_transfer,commercial_and_professional_infrastructure,internal_market_dynamics,internal_market_openness,physical_and_services_infrastructure,cultural_and_social_norms
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
213,Algeria,2011,2.91,3.16,2.63,3.11,2.22,2.67,2.73,3.12,3.40,2.59,3.18,2.99
213,Algeria,2012,3.24,3.29,2.80,3.07,2.19,3.32,2.81,2.83,3.75,3.04,3.33,3.17
213,Algeria,2013,3.42,3.19,2.56,2.75,2.45,3.16,2.88,2.86,4.00,2.97,3.47,3.19
244,Angola,2010,2.11,2.31,2.16,1.97,1.81,2.41,1.72,2.39,2.76,2.00,2.28,2.84
244,Angola,2012,2.74,2.74,2.19,2.16,1.93,2.24,1.71,2.65,2.94,2.15,2.28,2.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,Vietnam,2015,2.12,2.62,2.78,2.14,1.57,2.53,2.33,2.77,3.59,2.51,4.07,3.23
84,Vietnam,2017,2.27,1.90,2.40,2.09,1.83,2.61,2.19,2.82,4.15,2.79,4.19,3.62
260,Zambia,2010,2.16,2.64,2.64,2.68,2.28,2.99,1.94,3.04,3.08,3.07,3.03,2.52
260,Zambia,2012,2.14,2.50,2.64,2.21,1.93,2.46,1.85,3.07,3.11,2.90,3.11,2.59


In [27]:
df_data = df_data.rename(columns=
                         {"economy":"country",
                         "r&d_transfer":"research_and_development"})
df_data

Unnamed: 0_level_0,country,year,financing_for_entrepreneurs,governmental_support_and_policies,taxes_and_bureaucracy,governmental_programs,basic_school_entrepreneurial_education_and_training,post_school_entrepreneurial_education_and_training,research_and_development,commercial_and_professional_infrastructure,internal_market_dynamics,internal_market_openness,physical_and_services_infrastructure,cultural_and_social_norms
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
213,Algeria,2011,2.91,3.16,2.63,3.11,2.22,2.67,2.73,3.12,3.40,2.59,3.18,2.99
213,Algeria,2012,3.24,3.29,2.80,3.07,2.19,3.32,2.81,2.83,3.75,3.04,3.33,3.17
213,Algeria,2013,3.42,3.19,2.56,2.75,2.45,3.16,2.88,2.86,4.00,2.97,3.47,3.19
244,Angola,2010,2.11,2.31,2.16,1.97,1.81,2.41,1.72,2.39,2.76,2.00,2.28,2.84
244,Angola,2012,2.74,2.74,2.19,2.16,1.93,2.24,1.71,2.65,2.94,2.15,2.28,2.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,Vietnam,2015,2.12,2.62,2.78,2.14,1.57,2.53,2.33,2.77,3.59,2.51,4.07,3.23
84,Vietnam,2017,2.27,1.90,2.40,2.09,1.83,2.61,2.19,2.82,4.15,2.79,4.19,3.62
260,Zambia,2010,2.16,2.64,2.64,2.68,2.28,2.99,1.94,3.04,3.08,3.07,3.03,2.52
260,Zambia,2012,2.14,2.50,2.64,2.21,1.93,2.46,1.85,3.07,3.11,2.90,3.11,2.59


## Creating Database

In [28]:
conn = psycopg2.connect(
   database="postgres", user="postgres", password=f"{password}", host='127.0.0.1', port= '5432'
)

conn.autocommit = True
cursor = conn.cursor()
cursor.execute("SELECT datname FROM pg_database;")
list_database = cursor.fetchall()
dbname = "gem_db"
# try:
if (dbname,) not in list_database:
    cur = conn.cursor()
    cur.execute('CREATE DATABASE ' + dbname)
    cur.close()
    conn.close()
    print("Creating Database...")
    engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/{dbname}")
    connection = engine.connect()
    print('-'*30)
    print("Creating Tables, Please wait...")
    print('-'*30)
    Behavior_and_Attitudes.to_sql("behavior_and_attitudes",engine)
    print("Table Behavior_and_Attitudes created successfully")
    df_data.to_sql("framework_conditions", engine)
    print("Table framework_conditions created successfully")
    connection.close()
    print('-'*30)
    print("Database is ready to use.")
else:
    print("Database is already exists.")
# except:
#     print("Something went wrong.")

Creating Database...
------------------------------
Creating Tables, Please wait...
------------------------------
Table Behavior_and_Attitudes created successfully
Table framework_conditions created successfully
------------------------------
Database is ready to use.


### Getting only the recent data for geoJSON conversion

In [30]:
data_2019=Behavior_and_Attitudes.loc[Behavior_and_Attitudes["year"]==2019]

In [31]:
data_2019.count()

code                                          50
country                                       50
year                                          50
Perceived_opportunities                       50
Perceived_capabilities                        50
Fear_of_failure_rate                          50
Entrepreneurial_intentions                    50
Total_early_stage_Entrepreneurial_Activity    50
Established_Business_Ownership                50
Entrepreneurial_Employee_Activity             50
Motivational_Index                             0
Female_Male_TEA                               50
Female_Male_Opportunity_Driven_TEA             0
High_Job_Creation_Expectation                 50
Innovation                                     0
Business_Services_Sector                      50
High_Status_to_Successful_Entrepreneurs       50
Entrepreneurship_as_a_Good_Career_Choice      50
dtype: int64

In [32]:
#deleting null values
data_2019=data_2019.dropna( axis=1,how='any')

In [33]:
data_2019.count()

code                                          50
country                                       50
year                                          50
Perceived_opportunities                       50
Perceived_capabilities                        50
Fear_of_failure_rate                          50
Entrepreneurial_intentions                    50
Total_early_stage_Entrepreneurial_Activity    50
Established_Business_Ownership                50
Entrepreneurial_Employee_Activity             50
Female_Male_TEA                               50
High_Job_Creation_Expectation                 50
Business_Services_Sector                      50
High_Status_to_Successful_Entrepreneurs       50
Entrepreneurship_as_a_Good_Career_Choice      50
dtype: int64

In [34]:
data_2019

Unnamed: 0,code,country,year,Perceived_opportunities,Perceived_capabilities,Fear_of_failure_rate,Entrepreneurial_intentions,Total_early_stage_Entrepreneurial_Activity,Established_Business_Ownership,Entrepreneurial_Employee_Activity,Female_Male_TEA,High_Job_Creation_Expectation,Business_Services_Sector,High_Status_to_Successful_Entrepreneurs,Entrepreneurship_as_a_Good_Career_Choice
0,374,Armenia,2019,53.89,70.0,48.22,32.2,20.97,7.84,0.59,0.64,30.55,7.55,73.41,87.24
1,61,Australia,2019,45.74,56.04,47.37,13.02,10.52,6.53,8.28,0.72,24.64,26.31,74.0,56.39
2,375,Belarus,2019,29.52,42.34,37.96,6.59,5.78,2.72,0.49,0.82,28.23,10.16,69.9,70.3
3,55,Brazil,2019,46.37,61.99,35.57,30.17,23.3,16.16,0.62,0.98,8.93,7.57,72.25,75.25
4,101,Canada,2019,67.08,56.8,47.17,11.86,18.16,7.44,5.4,0.71,21.18,12.24,79.9,69.21
5,56,Chile,2019,47.62,75.54,58.07,57.57,36.71,10.61,3.64,0.79,36.34,19.9,74.58,74.36
6,86,China,2019,74.86,67.35,44.65,21.42,8.66,9.33,0.16,0.84,19.52,10.62,92.42,79.32
7,57,Colombia,2019,46.68,72.37,32.69,35.47,22.28,4.27,0.91,0.88,35.53,13.17,65.42,63.82
8,385,Croatia,2019,55.66,71.19,50.66,20.57,10.47,3.57,5.93,0.61,25.12,31.22,46.45,61.49
9,357,Cyprus,2019,38.52,58.16,36.4,21.2,12.17,10.1,6.21,0.57,22.01,22.71,76.69,72.96


In [35]:
data_2019.to_csv('Resources/behavior_and_attitudes_2019.csv',index=False)

### Code testing for app.py files

In [39]:

engine = create_engine(f'postgresql://postgres:{password}@localhost:5432/gem_db')
connection = engine.connect()

data = pd.read_sql("SELECT * FROM framework_conditions WHERE country= 'United States'",connection)
data = data.set_index(["year"])
print(data)

data = pd.DataFrame.from_dict(data, orient='columns')
print(data.to_json())
# data.values.to_list()

      code        country  financing_for_entrepreneurs  \
year                                                     
2011     1  United States                         3.80   
2001     1  United States                         4.30   
2002     1  United States                         4.10   
2003     1  United States                         3.57   
2004     1  United States                         3.88   
2005     1  United States                         4.08   
2006     1  United States                         3.91   
2007     1  United States                         3.77   
2008     1  United States                         3.01   
2009     1  United States                         2.72   
2010     1  United States                         2.24   
2012     1  United States                         2.97   
2013     1  United States                         2.62   
2014     1  United States                         2.99   
2015     1  United States                         3.22   
2016     1  Un

In [41]:


data = pd.read_sql("SELECT year,financing_for_entrepreneurs FROM framework_conditions WHERE country= 'United States'",connection)
# data = data.set_index(["financing_for_entrepreneurs"])
# data = data.set_index(["year"])
# print(data)
print(data)
x = []
y = []
for index, row in data.iterrows():
    x.append(row["year"])
    y.append(row["financing_for_entrepreneurs"])
f = {}
f["year"]=x
f["test"]=y
#data = pd.DataFrame.from_dict(data, orient='columns')
#data1 = pd.DataFrame(data=x, dtype=int, columns=["year"])
#data1["test"]=y
#data1=data1.set_index("year")
# print(data1.to_json())
#data1 = data1.append(y)
#print(data1)
print(json.dumps(f))
# print(data.to_json())
# json1 = data.to_json()
# print(json1)
# data1.to_json()

    year  financing_for_entrepreneurs
0   2011                         3.80
1   2001                         4.30
2   2002                         4.10
3   2003                         3.57
4   2004                         3.88
5   2005                         4.08
6   2006                         3.91
7   2007                         3.77
8   2008                         3.01
9   2009                         2.72
10  2010                         2.24
11  2012                         2.97
12  2013                         2.62
13  2014                         2.99
14  2015                         3.22
15  2016                         3.11
16  2017                         2.97
17  2018                         3.57
18  2019                         3.00
{"year": [2011.0, 2001.0, 2002.0, 2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0, 2010.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0, 2017.0, 2018.0, 2019.0], "test": [3.8, 4.3, 4.1, 3.57, 3.88, 4.08, 3.91, 3.77, 3.01, 2.72, 2.24, 2.97,

In [42]:
df_data.columns


Index(['country', 'year', 'financing_for_entrepreneurs',
       'governmental_support_and_policies', 'taxes_and_bureaucracy',
       'governmental_programs',
       'basic_school_entrepreneurial_education_and_training',
       'post_school_entrepreneurial_education_and_training',
       'research_and_development',
       'commercial_and_professional_infrastructure',
       'internal_market_dynamics', 'internal_market_openness',
       'physical_and_services_infrastructure', 'cultural_and_social_norms'],
      dtype='object')

In [43]:
countryData = pd.read_sql("SELECT * FROM framework_conditions WHERE country= 'United States' AND year=2019",connection)

labels=['country', 'financing_for_entrepreneurs','governmental_support_and_policies', 'taxes_and_bureaucracy',
   'governmental_programs','basic_school_entrepreneurial_education_and_training', 'post_school_entrepreneurial_education_and_training', 'research_and_development',
   'commercial_and_professional_infrastructure','internal_market_dynamics', 'internal_market_openness','physical_and_services_infrastructure', 'cultural_and_social_norms']
dataPoints=[]

for item in labels:
    dataPoints.append(countryData[item][0])

c = {}
c["labels"]=labels
c["dataPoints"]=dataPoints
print(countryData)
print(dataPoints)
print(c)
print(json.dumps(c))

   code        country  year  financing_for_entrepreneurs  \
0     1  United States  2019                          3.0   

   governmental_support_and_policies  taxes_and_bureaucracy  \
0                                3.5                   3.02   

   governmental_programs  basic_school_entrepreneurial_education_and_training  \
0                    2.2                                                2.1     

   post_school_entrepreneurial_education_and_training  \
0                                                3.0    

   research_and_development  commercial_and_professional_infrastructure  \
0                      2.02                                         4.0   

   internal_market_dynamics  internal_market_openness  \
0                      2.94                       2.0   

   physical_and_services_infrastructure  cultural_and_social_norms  
0                                  4.13                        4.5  
['United States', 3.0, 3.5, 3.02, 2.2, 2.1, 3.0, 2.02, 4.0, 2.94, 2.

In [44]:
indicators={
        'Financing for Entrepreneurs':'financing_for_entrepreneurs',
        'Governmental Support and Policies':'governmental_support_and_policies', 
        'Taxes and Bureaucracy':'taxes_and_bureaucracy',
        'Governmental Programs':'governmental_programs',
        'Basic School Entrepreneurial Education and Training':'basic_school_entrepreneurial_education_and_training', 
        'Post School Entrepreneurial Education and Training':'post_school_entrepreneurial_education_and_training', 
        'Research and Development':'research_and_development',
        'Commercial and Professional Infrastructure':'commercial_and_professional_infrastructure',
        'Internal Market Dynamics':'internal_market_dynamics', 
        'Internal Market Openness':'internal_market_openness',
        'Physical and Services Infrastructure':'physical_and_services_infrastructure', 
        'Cultural and Social Norms':'cultural_and_social_norms'}

keys = []
values=[]

for key in indicators.keys():
    keys.append(key)

    
for value in indicators.values():
    values.append(value)
    
data_dict = {}

# data_dict["country"] = country_list
data_dict["indicator_keys"] = keys
data_dict["indicator_values"] = values
print(keys)
print(data_dict)

['Financing for Entrepreneurs', 'Governmental Support and Policies', 'Taxes and Bureaucracy', 'Governmental Programs', 'Basic School Entrepreneurial Education and Training', 'Post School Entrepreneurial Education and Training', 'Research and Development', 'Commercial and Professional Infrastructure', 'Internal Market Dynamics', 'Internal Market Openness', 'Physical and Services Infrastructure', 'Cultural and Social Norms']
{'indicator_keys': ['Financing for Entrepreneurs', 'Governmental Support and Policies', 'Taxes and Bureaucracy', 'Governmental Programs', 'Basic School Entrepreneurial Education and Training', 'Post School Entrepreneurial Education and Training', 'Research and Development', 'Commercial and Professional Infrastructure', 'Internal Market Dynamics', 'Internal Market Openness', 'Physical and Services Infrastructure', 'Cultural and Social Norms'], 'indicator_values': ['financing_for_entrepreneurs', 'governmental_support_and_policies', 'taxes_and_bureaucracy', 'governmenta