In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures

In [None]:
df1 = pd.read_csv('data/3 - India_GDP_Historical_Data.csv')
df2 = pd.read_csv('data/9 - India_Inflation_Rate_Historical_Data.csv')

df1.drop('Unnamed: 0', axis = 1, inplace=True)
df2.drop('Unnamed: 0', axis = 1, inplace=True)

In [None]:
df1.head()

In [None]:
def clean_data(column):
    return pd.to_numeric(column.replace('[\$,B%]', '', regex=True))

df1['GDP'] = clean_data(df1['GDP'])
df1['Per_Capita'] = clean_data(df1['Per_Capita'])
df1['Growth_Rate'] = clean_data(df1['Growth_Rate'])

In [None]:
df1.describe()

In [None]:
# Growth over time on yearly basis

plt.figure(figsize = (15,9))
sns.lineplot(x='year', y='GDP', data = df1, marker= 'o', color = 'pink')
plt.title('GDP of India Y-o-Y')
plt.xlabel('Year')
plt.ylabel('GDP (in billion)')
plt.grid(True)
plt.show()

In [None]:
df1['decade'] = (df1['year'] // 10) * 10

decade_group = df1.groupby('decade')['Growth_Rate'].mean().reset_index()

#plot the date

plt.figure(figsize = (15,9))
sns.barplot(x='decade', y='Growth_Rate', data = decade_group, color = 'pink')
            

India's economic growth saw a dip below 3% in the 1970s but recovered in the following decades, reaching above 6% in the 2000s and 2010s. The global slowdown of 2020 impacted the economy, and the future growth trajectory remains to be seen.

In [None]:
plt.figure(figsize=(15,9))
sns.lineplot(x='year', y='Per_Capita', data=df1, marker='o', color='pink')
plt.title("GDP Per capita of India Year-on-Year")
plt.xlabel('Year')
plt.ylabel('GDP (in dollars)')
plt.grid(True)
plt.show()

In [None]:
decade_group = df1.groupby('decade')['Per_Capita'].mean().reset_index()

plt.figure(figsize=(15, 9))
sns.barplot(x='decade', y='Per_Capita', data=decade_group , color = 'pink')
plt.title('Average Per Capita GDP per Decade')
plt.xlabel('Decade')
plt.ylabel('Average (in billions) ')
plt.show()

India's per capita income soared from below $1,000 in the 2000s to over $2,000 in the 2010s, potentially due to the IT sector's rise. While GDP growth is a key metric, it's crucial to analyze inflation alongside per capita income to assess if it effectively translates to improved purchasing power for Indian citizens.

In [None]:
df2.head()

In [None]:
def clean_data(column):
    return pd.to_numeric(column.replace('[B%]', '', regex=True))

df2['Inflation_Rate'] = clean_data(df2['Inflation_Rate'])
df2['Annual_percent_geowth'] = clean_data(df2['Annual_percent_geowth'])


In [None]:
df2.describe()

In [None]:
plt.figure(figsize=(15,9))
sns.lineplot(x='year', y='Inflation_Rate', data=df2, marker='o', color='pink')
plt.title("Inflation rate of India Year-on-Year")
plt.xlabel('Year')
plt.ylabel('Inflation (in percentage)')
plt.grid(True)
plt.axhline(y = 0, color = 'purple')
plt.show()

The 1970s stand out as a decade of extremes, recording both the nation's highest and lowest inflation rates (occurring in 1974 and 1976, respectively).

In [None]:
highest_inflation_rate = df2['Inflation_Rate'].idxmax()
year_high = df2.loc[highest_inflation_rate]

lowest_inflation_rate = df2['Inflation_Rate'].idxmin()
year_low = df2.loc[lowest_inflation_rate]

print(year_high)
print(year_low)

In [None]:
df2['decade'] = (df2['year'] // 10) * 10

decade_group = df2.groupby('decade')['Inflation_Rate'].mean().reset_index()

plt.figure(figsize=(15, 9))
sns.barplot(x='decade', y='Inflation_Rate', data=decade_group, color = 'pink')
plt.title('Average Inflation Rate per Decade')
plt.xlabel('Decade')
plt.ylabel('Average Growth Rate (in percentage)')
plt.show()

The 1990s were characterized by a relatively elevated inflation rate, approaching 10%. However, a subsequent period of effective policy measures in the 2000s demonstrably curbed inflation, driving it down below 5%. While inflationary pressures reemerged somewhat in the 2010s, exceeding 6%, the overall trajectory reflects a commendable improvement in price stability.

In [None]:
ax = plt.axes(projection='3d')
ydata = df2['year']
xdata = df2['Inflation_Rate']
zdata = df2['Annual_percent_geowth']
ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap ='pink')

In [None]:
df_combined = pd.merge(df1, df2, on = 'year')
df_combined + df_combined.sort_values(by= 'year')
fig, ax1 = plt.subplots(figsize = (15,9))
color = 'tab:pink'
ax1.set_xlabel('Year')
ax1.set_ylabel('GDP', color = color)
ax1.plot(df_combined['year'], df_combined['GDP'], color =color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:purple'
ax2.set_ylabel('Inflation Change Rate (%)', color=color)
ax2.plot(df_combined['year'], df_combined['Annual_percent_geowth'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

plt.title('GDP vs Inflation Change Rate')
plt.show()





In [None]:
# MAchine Learning - statistical eqns solved by computer 
# y = B0 + B1x1 + c

df1.info()

In [None]:
df1.head()