# More Data Cleaning

## Interest Rate - US Data

In [27]:
import pandas as pd
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [28]:
interest_rate_us = pd.read_csv('../Resources/Datasets/Additional_factors/INTDSRUSM193N.csv')
interest_rate_us

Unnamed: 0,DATE,INTDSRUSM193N
0,2001-01-01,5.52
1,2001-02-01,5.00
2,2001-03-01,4.81
3,2001-04-01,4.28
4,2001-05-01,3.73
...,...,...
243,2021-04-01,0.25
244,2021-05-01,0.25
245,2021-06-01,0.25
246,2021-07-01,0.25


In [29]:
interest_rate_us.columns

Index(['DATE', 'INTDSRUSM193N'], dtype='object')

In [30]:
interest_rate_us.dtypes

DATE              object
INTDSRUSM193N    float64
dtype: object

In [31]:
interest_rate_us["Date"] = pd.to_datetime(interest_rate_us["DATE"])
interest_rate_us.head()

Unnamed: 0,DATE,INTDSRUSM193N,Date
0,2001-01-01,5.52,2001-01-01
1,2001-02-01,5.0,2001-02-01
2,2001-03-01,4.81,2001-03-01
3,2001-04-01,4.28,2001-04-01
4,2001-05-01,3.73,2001-05-01


In [32]:
interest_rate_us= interest_rate_us.drop(["DATE"],axis=1)
interest_rate_us

Unnamed: 0,INTDSRUSM193N,Date
0,5.52,2001-01-01
1,5.00,2001-02-01
2,4.81,2001-03-01
3,4.28,2001-04-01
4,3.73,2001-05-01
...,...,...
243,0.25,2021-04-01
244,0.25,2021-05-01
245,0.25,2021-06-01
246,0.25,2021-07-01


In [33]:
interest_rate_us = pd.DataFrame(data=interest_rate_us, columns={'Date',"INTDSRUSM193N"})
interest_rate_us

Unnamed: 0,INTDSRUSM193N,Date
0,5.52,2001-01-01
1,5.00,2001-02-01
2,4.81,2001-03-01
3,4.28,2001-04-01
4,3.73,2001-05-01
...,...,...
243,0.25,2021-04-01
244,0.25,2021-05-01
245,0.25,2021-06-01
246,0.25,2021-07-01


In [34]:
interest_rate_us['Year'] = interest_rate_us['Date'].dt.year
interest_rate_us['Month'] = interest_rate_us['Date'].dt.month
interest_rate_us

Unnamed: 0,INTDSRUSM193N,Date,Year,Month
0,5.52,2001-01-01,2001,1
1,5.00,2001-02-01,2001,2
2,4.81,2001-03-01,2001,3
3,4.28,2001-04-01,2001,4
4,3.73,2001-05-01,2001,5
...,...,...,...,...
243,0.25,2021-04-01,2021,4
244,0.25,2021-05-01,2021,5
245,0.25,2021-06-01,2021,6
246,0.25,2021-07-01,2021,7


In [35]:
interest_rate_us= interest_rate_us.drop(["Date"],axis=1)
interest_rate_us

Unnamed: 0,INTDSRUSM193N,Year,Month
0,5.52,2001,1
1,5.00,2001,2
2,4.81,2001,3
3,4.28,2001,4
4,3.73,2001,5
...,...,...,...
243,0.25,2021,4
244,0.25,2021,5
245,0.25,2021,6
246,0.25,2021,7


In [36]:
interest_rate_us = pd.DataFrame(data=interest_rate_us, columns=['Year','Month',"INTDSRUSM193N"])
interest_rate_us

Unnamed: 0,Year,Month,INTDSRUSM193N
0,2001,1,5.52
1,2001,2,5.00
2,2001,3,4.81
3,2001,4,4.28
4,2001,5,3.73
...,...,...,...
243,2021,4,0.25
244,2021,5,0.25
245,2021,6,0.25
246,2021,7,0.25


In [37]:
interest_rate_us= interest_rate_us.rename(columns={"Year":"Year", "Month":"Month",
        "INTDSRUSM193N":"Interest_Rate(%)"})
interest_rate_us

Unnamed: 0,Year,Month,Interest_Rate(%)
0,2001,1,5.52
1,2001,2,5.00
2,2001,3,4.81
3,2001,4,4.28
4,2001,5,3.73
...,...,...,...
243,2021,4,0.25
244,2021,5,0.25
245,2021,6,0.25
246,2021,7,0.25


In [38]:
interest_rate_us.describe()

Unnamed: 0,Year,Month,Interest_Rate(%)
count,248.0,248.0,248.0
mean,2010.83871,6.435484,1.979839
std,5.980247,3.44592,1.709814
min,2001.0,1.0,0.25
25%,2006.0,3.0,0.75
50%,2011.0,6.0,1.25
75%,2016.0,9.0,2.75
max,2021.0,12.0,6.25


In [39]:
interest_rate_yearly = interest_rate_us.groupby(['Year']).mean()
interest_rate_yearly

Unnamed: 0_level_0,Month,Interest_Rate(%)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,6.5,3.41
2002,6.5,1.173333
2003,6.5,2.104167
2004,6.5,2.395833
2005,6.5,4.25
2006,6.5,6.020833
2007,6.5,5.791667
2008,6.5,2.166667
2009,6.5,0.5
2010,6.5,0.729167


In [40]:
interest_rate_yearly = interest_rate_yearly.reset_index()
interest_rate_yearly

Unnamed: 0,Year,Month,Interest_Rate(%)
0,2001,6.5,3.41
1,2002,6.5,1.173333
2,2003,6.5,2.104167
3,2004,6.5,2.395833
4,2005,6.5,4.25
5,2006,6.5,6.020833
6,2007,6.5,5.791667
7,2008,6.5,2.166667
8,2009,6.5,0.5
9,2010,6.5,0.729167


In [41]:
interest_rate_yearly = interest_rate_yearly.drop(columns='Month')
interest_rate_yearly

Unnamed: 0,Year,Interest_Rate(%)
0,2001,3.41
1,2002,1.173333
2,2003,2.104167
3,2004,2.395833
4,2005,4.25
5,2006,6.020833
6,2007,5.791667
7,2008,2.166667
8,2009,0.5
9,2010,0.729167


## Working population data

In [42]:
working_pop = pd.read_csv('../Resources/Datasets/Additional_factors/LFWA64TTUSM647S.csv')
working_pop

Unnamed: 0,DATE,LFWA64TTUSM647S
0,1977-01-01,1.351501e+08
1,1977-02-01,1.353560e+08
2,1977-03-01,1.355741e+08
3,1977-04-01,1.358378e+08
4,1977-05-01,1.361373e+08
...,...,...
539,2021-12-01,2.051615e+08
540,2022-01-01,2.071452e+08
541,2022-02-01,2.071243e+08
542,2022-03-01,2.069504e+08


In [43]:
working_pop["Date"] = pd.to_datetime(working_pop["DATE"])
working_pop.head()

Unnamed: 0,DATE,LFWA64TTUSM647S,Date
0,1977-01-01,135150100.0,1977-01-01
1,1977-02-01,135356000.0,1977-02-01
2,1977-03-01,135574100.0,1977-03-01
3,1977-04-01,135837800.0,1977-04-01
4,1977-05-01,136137300.0,1977-05-01


In [44]:
working_pop= working_pop.drop(["DATE"],axis=1)
working_pop

Unnamed: 0,LFWA64TTUSM647S,Date
0,1.351501e+08,1977-01-01
1,1.353560e+08,1977-02-01
2,1.355741e+08,1977-03-01
3,1.358378e+08,1977-04-01
4,1.361373e+08,1977-05-01
...,...,...
539,2.051615e+08,2021-12-01
540,2.071452e+08,2022-01-01
541,2.071243e+08,2022-02-01
542,2.069504e+08,2022-03-01


In [45]:
working_pop = pd.DataFrame(data=working_pop, columns={'Date',"LFWA64TTUSM647S"})
working_pop

Unnamed: 0,LFWA64TTUSM647S,Date
0,1.351501e+08,1977-01-01
1,1.353560e+08,1977-02-01
2,1.355741e+08,1977-03-01
3,1.358378e+08,1977-04-01
4,1.361373e+08,1977-05-01
...,...,...
539,2.051615e+08,2021-12-01
540,2.071452e+08,2022-01-01
541,2.071243e+08,2022-02-01
542,2.069504e+08,2022-03-01


In [46]:
working_pop['Year'] = working_pop['Date'].dt.year
working_pop['Month'] = working_pop['Date'].dt.month
working_pop

Unnamed: 0,LFWA64TTUSM647S,Date,Year,Month
0,1.351501e+08,1977-01-01,1977,1
1,1.353560e+08,1977-02-01,1977,2
2,1.355741e+08,1977-03-01,1977,3
3,1.358378e+08,1977-04-01,1977,4
4,1.361373e+08,1977-05-01,1977,5
...,...,...,...,...
539,2.051615e+08,2021-12-01,2021,12
540,2.071452e+08,2022-01-01,2022,1
541,2.071243e+08,2022-02-01,2022,2
542,2.069504e+08,2022-03-01,2022,3


In [47]:
working_pop= working_pop.drop(["Date"],axis=1)
working_pop

Unnamed: 0,LFWA64TTUSM647S,Year,Month
0,1.351501e+08,1977,1
1,1.353560e+08,1977,2
2,1.355741e+08,1977,3
3,1.358378e+08,1977,4
4,1.361373e+08,1977,5
...,...,...,...
539,2.051615e+08,2021,12
540,2.071452e+08,2022,1
541,2.071243e+08,2022,2
542,2.069504e+08,2022,3


In [48]:
working_pop.columns

Index(['LFWA64TTUSM647S', 'Year', 'Month'], dtype='object')

In [49]:
working_pop = pd.DataFrame(data=working_pop, columns=['Year','Month',"LFWA64TTUSM647S"])
working_pop

Unnamed: 0,Year,Month,LFWA64TTUSM647S
0,1977,1,1.351501e+08
1,1977,2,1.353560e+08
2,1977,3,1.355741e+08
3,1977,4,1.358378e+08
4,1977,5,1.361373e+08
...,...,...,...
539,2021,12,2.051615e+08
540,2022,1,2.071452e+08
541,2022,2,2.071243e+08
542,2022,3,2.069504e+08


In [50]:
working_pop= working_pop.rename(columns={"Year":"Year", "Month":"Month",
        "LFWA64TTUSM647S":"working_population"})
working_pop

Unnamed: 0,Year,Month,working_population
0,1977,1,1.351501e+08
1,1977,2,1.353560e+08
2,1977,3,1.355741e+08
3,1977,4,1.358378e+08
4,1977,5,1.361373e+08
...,...,...,...
539,2021,12,2.051615e+08
540,2022,1,2.071452e+08
541,2022,2,2.071243e+08
542,2022,3,2.069504e+08


In [51]:
working_pop_yearly = working_pop.groupby(['Year']).mean()
working_pop_yearly

Unnamed: 0_level_0,Month,working_population
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1977,6.5,136378000.0
1978,6.5,138693000.0
1979,6.5,141069800.0
1980,6.5,143415800.0
1981,6.5,145283600.0
1982,6.5,146929400.0
1983,6.5,148281900.0
1984,6.5,149923500.0
1985,6.5,151206400.0
1986,6.5,153087200.0


In [52]:
working_pop_yearly= working_pop_yearly.reset_index()
working_pop_yearly

Unnamed: 0,Year,Month,working_population
0,1977,6.5,136378000.0
1,1978,6.5,138693000.0
2,1979,6.5,141069800.0
3,1980,6.5,143415800.0
4,1981,6.5,145283600.0
5,1982,6.5,146929400.0
6,1983,6.5,148281900.0
7,1984,6.5,149923500.0
8,1985,6.5,151206400.0
9,1986,6.5,153087200.0


In [53]:
working_pop_yearly = working_pop_yearly.drop(columns='Month')
working_pop_yearly

Unnamed: 0,Year,working_population
0,1977,136378000.0
1,1978,138693000.0
2,1979,141069800.0
3,1980,143415800.0
4,1981,145283600.0
5,1982,146929400.0
6,1983,148281900.0
7,1984,149923500.0
8,1985,151206400.0
9,1986,153087200.0


In [54]:
working_pop_yearly.dtypes

Year                    int64
working_population    float64
dtype: object

In [55]:
working_pop_yearly=working_pop_yearly.astype('int64')
working_pop_yearly

Unnamed: 0,Year,working_population
0,1977,136377958
1,1978,138692990
2,1979,141069819
3,1980,143415797
4,1981,145283617
5,1982,146929406
6,1983,148281866
7,1984,149923547
8,1985,151206428
9,1986,153087163


## Stock market to GDP (%) data

In [56]:
stock_to_GDP_percent = pd.read_csv('../Resources/Datasets/Additional_factors/DDDM01USA156NWDB.csv')
stock_to_GDP_percent

Unnamed: 0,DATE,DDDM01USA156NWDB
0,1975-01-01,41.77093
1,1976-01-01,47.13859
2,1977-01-01,40.07242
3,1978-01-01,36.65425
4,1979-01-01,37.81746
5,1980-01-01,47.59019
6,1981-01-01,39.39957
7,1982-01-01,43.56929
8,1983-01-01,49.78028
9,1984-01-01,39.68086


In [57]:
stock_to_GDP_percent.loc[len(stock_to_GDP_percent.index)] = ['1/1/2020',194.490]
stock_to_GDP_percent

Unnamed: 0,DATE,DDDM01USA156NWDB
0,1975-01-01,41.77093
1,1976-01-01,47.13859
2,1977-01-01,40.07242
3,1978-01-01,36.65425
4,1979-01-01,37.81746
5,1980-01-01,47.59019
6,1981-01-01,39.39957
7,1982-01-01,43.56929
8,1983-01-01,49.78028
9,1984-01-01,39.68086


In [58]:
stock_to_GDP_percent["Date"] = pd.to_datetime(stock_to_GDP_percent["DATE"])
stock_to_GDP_percent.head()

Unnamed: 0,DATE,DDDM01USA156NWDB,Date
0,1975-01-01,41.77093,1975-01-01
1,1976-01-01,47.13859,1976-01-01
2,1977-01-01,40.07242,1977-01-01
3,1978-01-01,36.65425,1978-01-01
4,1979-01-01,37.81746,1979-01-01


In [59]:
stock_to_GDP_percent= stock_to_GDP_percent.drop(["DATE"],axis=1)
stock_to_GDP_percent

Unnamed: 0,DDDM01USA156NWDB,Date
0,41.77093,1975-01-01
1,47.13859,1976-01-01
2,40.07242,1977-01-01
3,36.65425,1978-01-01
4,37.81746,1979-01-01
5,47.59019,1980-01-01
6,39.39957,1981-01-01
7,43.56929,1982-01-01
8,49.78028,1983-01-01
9,39.68086,1984-01-01


In [60]:
stock_to_GDP_percent = pd.DataFrame(data=stock_to_GDP_percent, columns={'Date',"DDDM01USA156NWDB"})
stock_to_GDP_percent

Unnamed: 0,DDDM01USA156NWDB,Date
0,41.77093,1975-01-01
1,47.13859,1976-01-01
2,40.07242,1977-01-01
3,36.65425,1978-01-01
4,37.81746,1979-01-01
5,47.59019,1980-01-01
6,39.39957,1981-01-01
7,43.56929,1982-01-01
8,49.78028,1983-01-01
9,39.68086,1984-01-01


In [61]:
stock_to_GDP_percent['Year'] = stock_to_GDP_percent['Date'].dt.year
stock_to_GDP_percent['Month'] = stock_to_GDP_percent['Date'].dt.month
stock_to_GDP_percent

Unnamed: 0,DDDM01USA156NWDB,Date,Year,Month
0,41.77093,1975-01-01,1975,1
1,47.13859,1976-01-01,1976,1
2,40.07242,1977-01-01,1977,1
3,36.65425,1978-01-01,1978,1
4,37.81746,1979-01-01,1979,1
5,47.59019,1980-01-01,1980,1
6,39.39957,1981-01-01,1981,1
7,43.56929,1982-01-01,1982,1
8,49.78028,1983-01-01,1983,1
9,39.68086,1984-01-01,1984,1


In [62]:
stock_to_GDP_percent= stock_to_GDP_percent.drop(["Date"],axis=1)
stock_to_GDP_percent

Unnamed: 0,DDDM01USA156NWDB,Year,Month
0,41.77093,1975,1
1,47.13859,1976,1
2,40.07242,1977,1
3,36.65425,1978,1
4,37.81746,1979,1
5,47.59019,1980,1
6,39.39957,1981,1
7,43.56929,1982,1
8,49.78028,1983,1
9,39.68086,1984,1


In [63]:
stock_to_GDP_percent = pd.DataFrame(data=stock_to_GDP_percent, columns=['Year','Month',"DDDM01USA156NWDB"])
stock_to_GDP_percent

Unnamed: 0,Year,Month,DDDM01USA156NWDB
0,1975,1,41.77093
1,1976,1,47.13859
2,1977,1,40.07242
3,1978,1,36.65425
4,1979,1,37.81746
5,1980,1,47.59019
6,1981,1,39.39957
7,1982,1,43.56929
8,1983,1,49.78028
9,1984,1,39.68086


In [64]:
stock_to_GDP_percent= stock_to_GDP_percent.rename(columns={"Year":"Year", "Month":"Month",
        "DDDM01USA156NWDB":"stock_to_GDP(%)"})
stock_to_GDP_percent

Unnamed: 0,Year,Month,stock_to_GDP(%)
0,1975,1,41.77093
1,1976,1,47.13859
2,1977,1,40.07242
3,1978,1,36.65425
4,1979,1,37.81746
5,1980,1,47.59019
6,1981,1,39.39957
7,1982,1,43.56929
8,1983,1,49.78028
9,1984,1,39.68086


In [65]:
stock_to_GDP_percent_yearly = stock_to_GDP_percent.drop(columns='Month')
stock_to_GDP_percent_yearly

Unnamed: 0,Year,stock_to_GDP(%)
0,1975,41.77093
1,1976,47.13859
2,1977,40.07242
3,1978,36.65425
4,1979,37.81746
5,1980,47.59019
6,1981,39.39957
7,1982,43.56929
8,1983,49.78028
9,1984,39.68086


## Inflation data

In [66]:
inflation = pd.read_csv('../Resources/Datasets/Additional_factors/FPCPITOTLZGUSA.csv')
inflation

Unnamed: 0,DATE,FPCPITOTLZGUSA
0,1960-01-01,1.457976
1,1961-01-01,1.070724
2,1962-01-01,1.198773
3,1963-01-01,1.239669
4,1964-01-01,1.278912
...,...,...
57,2017-01-01,2.130110
58,2018-01-01,2.442583
59,2019-01-01,1.812210
60,2020-01-01,1.233584


In [67]:
inflation["Date"] = pd.to_datetime(inflation["DATE"])
inflation.head()

Unnamed: 0,DATE,FPCPITOTLZGUSA,Date
0,1960-01-01,1.457976,1960-01-01
1,1961-01-01,1.070724,1961-01-01
2,1962-01-01,1.198773,1962-01-01
3,1963-01-01,1.239669,1963-01-01
4,1964-01-01,1.278912,1964-01-01


In [68]:
inflation= inflation.drop(["DATE"],axis=1)
inflation

Unnamed: 0,FPCPITOTLZGUSA,Date
0,1.457976,1960-01-01
1,1.070724,1961-01-01
2,1.198773,1962-01-01
3,1.239669,1963-01-01
4,1.278912,1964-01-01
...,...,...
57,2.130110,2017-01-01
58,2.442583,2018-01-01
59,1.812210,2019-01-01
60,1.233584,2020-01-01


In [69]:
inflation = pd.DataFrame(data=inflation, columns={'Date',"FPCPITOTLZGUSA"})
inflation

Unnamed: 0,FPCPITOTLZGUSA,Date
0,1.457976,1960-01-01
1,1.070724,1961-01-01
2,1.198773,1962-01-01
3,1.239669,1963-01-01
4,1.278912,1964-01-01
...,...,...
57,2.130110,2017-01-01
58,2.442583,2018-01-01
59,1.812210,2019-01-01
60,1.233584,2020-01-01


In [70]:
inflation['Year'] = inflation['Date'].dt.year
inflation['Month'] = inflation['Date'].dt.month
inflation

Unnamed: 0,FPCPITOTLZGUSA,Date,Year,Month
0,1.457976,1960-01-01,1960,1
1,1.070724,1961-01-01,1961,1
2,1.198773,1962-01-01,1962,1
3,1.239669,1963-01-01,1963,1
4,1.278912,1964-01-01,1964,1
...,...,...,...,...
57,2.130110,2017-01-01,2017,1
58,2.442583,2018-01-01,2018,1
59,1.812210,2019-01-01,2019,1
60,1.233584,2020-01-01,2020,1


In [71]:
inflation= inflation.drop(["Date"],axis=1)
inflation

Unnamed: 0,FPCPITOTLZGUSA,Year,Month
0,1.457976,1960,1
1,1.070724,1961,1
2,1.198773,1962,1
3,1.239669,1963,1
4,1.278912,1964,1
...,...,...,...
57,2.130110,2017,1
58,2.442583,2018,1
59,1.812210,2019,1
60,1.233584,2020,1


In [72]:
inflation = pd.DataFrame(data=inflation, columns=['Year','Month',"FPCPITOTLZGUSA"])
inflation

Unnamed: 0,Year,Month,FPCPITOTLZGUSA
0,1960,1,1.457976
1,1961,1,1.070724
2,1962,1,1.198773
3,1963,1,1.239669
4,1964,1,1.278912
...,...,...,...
57,2017,1,2.130110
58,2018,1,2.442583
59,2019,1,1.812210
60,2020,1,1.233584


In [73]:
inflation= inflation.rename(columns={"Year":"Year", "Month":"Month",
        "FPCPITOTLZGUSA":"inflation(%)"})
inflation

Unnamed: 0,Year,Month,inflation(%)
0,1960,1,1.457976
1,1961,1,1.070724
2,1962,1,1.198773
3,1963,1,1.239669
4,1964,1,1.278912
...,...,...,...
57,2017,1,2.130110
58,2018,1,2.442583
59,2019,1,1.812210
60,2020,1,1.233584


In [74]:
inflation_yearly = inflation.drop(columns='Month')
inflation_yearly

Unnamed: 0,Year,inflation(%)
0,1960,1.457976
1,1961,1.070724
2,1962,1.198773
3,1963,1.239669
4,1964,1.278912
...,...,...
57,2017,2.130110
58,2018,2.442583
59,2019,1.812210
60,2020,1.233584


In [75]:
df_merged_1 = inflation_yearly.merge(stock_to_GDP_percent_yearly, how='inner', on=['Year'])
df_merged_1

Unnamed: 0,Year,inflation(%),stock_to_GDP(%)
0,1975,9.143147,41.77093
1,1976,5.744813,47.13859
2,1977,6.501684,40.07242
3,1978,7.630964,36.65425
4,1979,11.254471,37.81746
5,1980,13.549202,47.59019
6,1981,10.334715,39.39957
7,1982,6.131427,43.56929
8,1983,3.212435,49.78028
9,1984,4.300535,39.68086


In [76]:
df_merged_2 = df_merged_1.merge(working_pop_yearly, how='inner', on=['Year'])
df_merged_2

Unnamed: 0,Year,inflation(%),stock_to_GDP(%),working_population
0,1977,6.501684,40.07242,136377958
1,1978,7.630964,36.65425,138692990
2,1979,11.254471,37.81746,141069819
3,1980,13.549202,47.59019,143415797
4,1981,10.334715,39.39957,145283617
5,1982,6.131427,43.56929,146929406
6,1983,3.212435,49.78028,148281866
7,1984,4.300535,39.68086,149923547
8,1985,3.545644,53.02763,151206428
9,1986,1.898048,55.41805,153087163


In [77]:
df_merged = df_merged_2.merge(interest_rate_yearly, how='inner', on='Year')
df_merged

Unnamed: 0,Year,inflation(%),stock_to_GDP(%),working_population,Interest_Rate(%)
0,2001,2.826171,132.148,181476647,3.41
1,2002,1.586032,101.0791,183792729,1.173333
2,2003,2.270095,124.5066,186939817,2.104167
3,2004,2.677237,133.6506,188763071,2.395833
4,2005,3.392747,130.4083,191024953,4.25
5,2006,3.225944,141.6542,193219398,6.020833
6,2007,2.852672,137.8527,195663562,5.791667
7,2008,3.8391,78.7766,196691536,2.166667
8,2009,-0.355546,104.3488,197897475,0.5
9,2010,1.640043,115.2841,199183839,0.729167


In [78]:
df_merged.describe()

Unnamed: 0,Year,inflation(%),stock_to_GDP(%),working_population,Interest_Rate(%)
count,20.0,20.0,20.0,20.0,20.0
mean,2010.5,2.063321,132.850835,197785000.0,2.0375
std,5.91608,1.057007,25.988146,7884941.0,1.68507
min,2001.0,-0.355546,78.7766,181476600.0,0.5
25%,2005.75,1.555732,115.27755,192670800.0,0.75
50%,2010.5,2.099724,135.54755,199573200.0,1.399167
75%,2015.25,2.832796,146.35355,204639000.0,2.53125
max,2020.0,3.8391,194.49,206538900.0,6.020833


In [55]:
#df_merged.to_csv('../Resources/Datasets/cleaned_data/4_more_factors.csv', index=False)
