In [1]:
import os

# Name the project
project_name = "india_economy_project"

# Create main project folder
os.makedirs(project_name, exist_ok=True)

# Create subfolders inside the project
subfolders = ["data", "notebooks", "scripts"]
for folder in subfolders:
    os.makedirs(os.path.join(project_name, folder), exist_ok=True)

print("Project folders created")

Project folders created


In [2]:
# Create a README.md file
readme_path = os.path.join(project_name, "README.md")
with open(readme_path, "w") as f:
    f.write("# India's Economy Since Independence\n\n")
    f.write("This project analyses India's economy from 1947 to 2025 using Python.\n")

print("README.md created")


README.md created


In [3]:
import subprocess

# Move into the project folder
os.chdir(project_name)

subprocess.run(["git", "init"])

subprocess.run(["git", "add", "."])

subprocess.run(["git", "commit", "-m", "Initial commit - project structure created"])

print("Git repository initialized and first commit made")

Git repository initialized and first commit made


In [4]:
import subprocess

# Link prohect to Github

github_repo_url = "https://github.com/RiddhiGaglani/bee2041-final-project.git"

subprocess.run(["git", "remote", "add", "origin", github_repo_url])

subprocess.run(["git", "branch", "-M", "main"])

subprocess.run(["git", "push", "-u", "origin", "main"])

CompletedProcess(args=['git', 'push', '-u', 'origin', 'main'], returncode=1)

In [5]:
# Import Data

import wbdata
import pandas as pd
import datetime
import os

country_code = "IN"  # India
indicators = {
    'GDP (current US$)': 'NY.GDP.MKTP.CD',
    'Inflation (annual %)': 'FP.CPI.TOTL.ZG',
    'Population': 'SP.POP.TOTL'
}

start_date = datetime.datetime(1947, 1, 1)
end_date = datetime.datetime(2024, 12, 31)

all_data = {}
for name, code in indicators.items():
    print(f"Downloading {name}...")
    series = wbdata.get_series(code, country_code, date=(start_date, end_date))  # ✅ No convert_date
    all_data[name] = series

# Combine into Data Frame

df = pd.DataFrame(all_data)

data_folder = "../data"
os.makedirs(data_folder, exist_ok=True)
csv_path = os.path.join(data_folder, "india_economic_data.csv")
df.to_csv(csv_path, index=True)

print(f"Data downloaded and saved to {csv_path}")


df = pd.read_csv("../data/india_economic_data.csv")


print(df.head())

# Checking for missing values
print("\nMissing values per column:")
print(df.isnull().sum())

df = df.sort_values("date")

df["GDP (current US$)"] = df["GDP (current US$)"] / 1e9  # Convert to Billions
df["Population"] = df["Population"] / 1e6               # Convert to Millions

df.rename(columns={
    "GDP (current US$)": "GDP (Billion US$)",
    "Population": "Population (Million)"
}, inplace=True)

# Save the clean dataset
df.to_csv("../data/india_economic_data_clean.csv", index=False)

print("\n Data cleaned and saved to india_economic_data_clean.csv")



Downloading GDP (current US$)...
Downloading Inflation (annual %)...
Downloading Population...
Data downloaded and saved to ../data\india_economic_data.csv
   date  GDP (current US$)  Inflation (annual %)    Population
0  2024                NaN              4.953036           NaN
1  2023       3.567552e+12              5.649143  1.438070e+09
2  2022       3.353470e+12              6.699034  1.425423e+09
3  2021       3.167271e+12              5.131407  1.414204e+09
4  2020       2.674852e+12              6.623437  1.402618e+09

Missing values per column:
date                    0
GDP (current US$)       1
Inflation (annual %)    0
Population              1
dtype: int64

 Data cleaned and saved to india_economic_data_clean.csv
