## Dependencies

In [16]:
import pandas as pd
import statsmodels.api as sm
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings("ignore")

## Data Extraction

In [None]:
# Load Excel file
merged_path = os.path.join("..", "data", "india", "merged_india.xlsx")
df = pd.read_excel(merged_path)

# Clean column names
df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns]

# Rename for clarity
df = df.rename(columns={
    "cpi_data": "inflation_rate",
    "manufacturing_(%_age_of_gdp)": "manufacturing_output",
    "trade_balance_(%_age_of_gdp)": "trade_deficit",
    "tariff_rate": "tariff"
})

# Preview the dataframe
print(df)


    year  inflation_rate  manufacturing_output  trade_deficit  tariff
0   2013            10.0                 15.25           0.00     0.0
1   2014             6.7                 15.06          -2.99     0.0
2   2015             4.9                 15.58          -2.30     7.5
3   2016             4.9                 15.16          -1.77     7.6
4   2017             3.3                 15.02          -3.16     6.8
5   2018             3.9                 14.88          -3.76     6.0
6   2019             3.7                 13.46          -2.58     8.8
7   2020             6.6                 14.12          -0.39     7.7
8   2021             5.1                 14.38          -2.62     7.3
9   2022             6.7                 13.12          -3.56     6.0
10  2023             5.6                 12.93          -2.07     0.0


## Defining The Variables

In [18]:
# Independent variable (X)
X = df[["tariff"]]

# Dependent variables (Y)
y_inflation = df["inflation_rate"]
y_manufacturing = df["manufacturing_output"]
y_trade_balance = df["trade_deficit"]


## Statistical Analysis

In [19]:
model_inflation = LinearRegression().fit(X, y_inflation)
model_manufacturing = LinearRegression().fit(X, y_manufacturing)
model_trade_balance = LinearRegression().fit(X, y_trade_balance)


In [20]:
print("Inflation Model:")
print("Coefficient:", model_inflation.coef_)
print("Intercept:", model_inflation.intercept_)

print("\nManufacturing Output Model:")
print("Coefficient:", model_manufacturing.coef_)
print("Intercept:", model_manufacturing.intercept_)

print("\nTrade Balance Model:")
print("Coefficient:", model_trade_balance.coef_)
print("Intercept:", model_trade_balance.intercept_)


Inflation Model:
Coefficient: [-0.34639122]
Intercept: 7.398797579958653

Manufacturing Output Model:
Coefficient: [0.00443573]
Intercept: 14.427641675787426

Trade Balance Model:
Coefficient: [-0.07321917]
Intercept: -1.9068412531922654


In [21]:
X_sm = sm.add_constant(X)  # Adds intercept term manually

model_inf = sm.OLS(y_inflation, X_sm).fit()
print(model_inf.summary())

model_manu = sm.OLS(y_manufacturing, X_sm).fit()
print(model_manu.summary())

model_trade = sm.OLS(y_trade_balance, X_sm).fit()
print(model_trade.summary())


                            OLS Regression Results                            
Dep. Variable:         inflation_rate   R-squared:                       0.401
Model:                            OLS   Adj. R-squared:                  0.334
Method:                 Least Squares   F-statistic:                     6.023
Date:                Wed, 09 Apr 2025   Prob (F-statistic):             0.0365
Time:                        10:51:35   Log-Likelihood:                -19.280
No. Observations:                  11   AIC:                             42.56
Df Residuals:                       9   BIC:                             43.36
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.3988      0.875      8.461      0.0

In [22]:
def save_ols_summary(x, y, data, filename):
    X = sm.add_constant(data[[x]])
    y_vals = data[y]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = sm.OLS(y_vals, X).fit()

    summary_str = model.summary().as_text()

    output_path = os.path.join("..", "output", "india", filename)
    with open(output_path, "w") as f:
        f.write(summary_str)

In [23]:
save_ols_summary("tariff", "inflation_rate", df, "inflation_ols.txt")
save_ols_summary("tariff", "manufacturing_output", df, "manufacturing_ols.txt")
save_ols_summary("tariff", "trade_deficit", df, "trade_ols.txt")

## Saving Plots

In [24]:
def regression_plot_save(x, y, data, title, color, filename):
    X = sm.add_constant(data[[x]])
    y_vals = data[y]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = sm.OLS(y_vals, X).fit()
        pred = model.predict(X)

    r_squared = model.rsquared
    p_val = model.pvalues[1]
    slope = model.params[1]

    plt.figure()
    sns.regplot(x=x, y=y, data=data, ci=None, color=color)
    plt.title(title)
    plt.xlabel("Tariff Rate (%)")
    plt.ylabel(y)

    # Add textbox
    textstr = f"$R^2$ = {r_squared:.3f}\nSlope = {slope:.3f}\np = {p_val:.3f}"
    plt.text(0.05, 0.95, textstr, transform=plt.gca().transAxes,
             fontsize=10, verticalalignment='top',
             bbox=dict(boxstyle='round,pad=0.4', facecolor='white', alpha=0.5))

    # Save the figure
    output_path = os.path.join("..", "output", "india", filename)
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()  # Close to avoid displaying.



In [25]:
regression_plot_save("tariff", "inflation_rate", df, "Tariff vs Inflation Rate (CPI)", "blue", "inflation_plot.png")
regression_plot_save("tariff", "manufacturing_output", df, "Tariff vs Manufacturing Output", "green", "manufacturing_plot.png")
regression_plot_save("tariff", "trade_deficit", df, "Tariff vs Trade Deficit", "red", "trade_plot.png")