# Week 8 Assignment


Rishav Chaudhary

Presidential Graduate School, Kathmandu, Nepal

PRG 330: Python Programming for Data

Professor Tek Raj Panta

December 11, 2024

# GDP Growth Predicion

## Objective
**Predict Nepal's Growth based on economic indicators**

## Loading and Exploring Data

In [18]:
import pandas as pd

# Loading Data
gdp = pd.read_excel("data/macro-economic-indicators/GDP.xlsx")

print(gdp.head())

   Year       GDP  Per Capita  Growth  Trade Balance (USD Billion)  % of GDP  \
0  1965  0.735267     67.0075 -1.2032                    -0.045282   -6.1585   
1  1966  0.906812     81.1644  7.0406                    -0.028219   -3.1119   
2  1967  0.841974     73.9679 -1.5715                    -0.000656   -0.0779   
3  1968  0.772229     66.5489  0.6755                    -0.005060   -0.6552   
4  1969  0.788642     66.6383  4.4634                    -0.025284   -3.2060   

   Contribution of Manufactoring Industry to GDP  \
0                                       3.284541   
1                                       3.430308   
2                                       3.413874   
3                                       3.568939   
4                                       3.844709   

   No of People Going for Foreign Employment  
0                                          0  
1                                          0  
2                                          0  
3                 

## Data Preparation and Data Cleaning

In [19]:
gdp.rename(columns={
    "Per Capita":"per_capita", 
    "Trade Balance (USD Billion)":"trade_balance", 
    "% of GDP":"percent_of_gdp", 
    "Contribution of Manufactoring Industry to GDP":"manufacturing_industry_contrib_to_gdp", 
    "No of People Going for Foreign Employment":"people_going_foreign_employment"
    }, inplace=True)

print(gdp.head())

   Year       GDP  per_capita  Growth  trade_balance  percent_of_gdp  \
0  1965  0.735267     67.0075 -1.2032      -0.045282         -6.1585   
1  1966  0.906812     81.1644  7.0406      -0.028219         -3.1119   
2  1967  0.841974     73.9679 -1.5715      -0.000656         -0.0779   
3  1968  0.772229     66.5489  0.6755      -0.005060         -0.6552   
4  1969  0.788642     66.6383  4.4634      -0.025284         -3.2060   

   manufacturing_industry_contrib_to_gdp  people_going_foreign_employment  
0                               3.284541                                0  
1                               3.430308                                0  
2                               3.413874                                0  
3                               3.568939                                0  
4                               3.844709                                0  


In [20]:
# checking missing values
print(gdp.isna().sum())

Year                                     0
GDP                                      0
per_capita                               0
Growth                                   0
trade_balance                            0
percent_of_gdp                           0
manufacturing_industry_contrib_to_gdp    0
people_going_foreign_employment          0
dtype: int64


In [21]:
# checking duplicated value
print(gdp.duplicated().sum())

0


## Feature Engineering

### Selecting features and target

In [22]:
# Selecting features and target

# Selecting features
x = gdp[[
    "trade_balance",
    "percent_of_gdp",
    "manufacturing_industry_contrib_to_gdp"
]]

# Selecting target variable
y = gdp["Growth"]

# Checking missing values in x
print(x.isna().sum())

trade_balance                            0
percent_of_gdp                           0
manufacturing_industry_contrib_to_gdp    0
dtype: int64


### Splitting the data into training and testing

In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

### Scaling the features

In [24]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Model Implementation
- Linear Regression
- RandomForestRegressor
- Gradient Boosting

### Linear Regression

In [25]:
from sklearn.linear_model import LinearRegression

# Loading the Model
linear_model = LinearRegression()

# Fitting model on training dataset (features is scaled while target is not)
linear_model.fit(X_train_scaled, y_train)

# Predicting value
y_pred_linear = linear_model.predict(X_test_scaled)


### Random Forest Regressor

In [26]:
from sklearn.ensemble import RandomForestRegressor

# Loading the Model
random_forest = RandomForestRegressor(random_state=42)

# Fitting model on training dataset 
random_forest.fit(X_train, y_train)

# Predicting value
y_pred_random_forest = random_forest.predict(X_test)

### Gradient Boost Regressor


In [27]:
from sklearn.ensemble import GradientBoostingRegressor

# Loading the Model
gradient_boosting = GradientBoostingRegressor(random_state=42)

# Fitting model on training dataset 
gradient_boosting.fit(X_train, y_train)

# Predicting value
y_pred_gradient_boosting = gradient_boosting.predict(X_test)

## Model Evaluation

### Linear Regression

In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Linear Regression Evaluation
mae_linear = mean_absolute_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mean_squared_error(y_test, y_pred_linear))
r2_linear = r2_score(y_test, y_pred_linear)

print("Linear Regression Evaluation:")
print(f"MAE: {mae_linear:.4f}")
print(f"RMSE: {rmse_linear:.4f}")
print(f"R²: {r2_linear:.4f}")

Linear Regression Evaluation:
MAE: 1.4752
RMSE: 2.5952
R²: -0.1775


### Random Forest Regressor

In [29]:
# Random Forest Evaluation
mae_rf = mean_absolute_error(y_test, y_pred_random_forest)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_random_forest))
r2_rf = r2_score(y_test, y_pred_random_forest)

print("\nRandom Forest Evaluation:")
print(f"MAE: {mae_rf:.4f}")
print(f"RMSE: {rmse_rf:.4f}")
print(f"R²: {r2_rf:.4f}")


Random Forest Evaluation:
MAE: 1.7090
RMSE: 2.6075
R²: -0.1886


### Gradient Boost Regressor


In [30]:
# Gradient Boosting Evaluation
mae_gb = mean_absolute_error(y_test, y_pred_gradient_boosting)
rmse_gb = np.sqrt(mean_squared_error(y_test, y_pred_gradient_boosting))
r2_gb = r2_score(y_test, y_pred_gradient_boosting)

print("\nGradient Boosting Evaluation:")
print(f"MAE: {mae_gb:.4f}")
print(f"RMSE: {rmse_gb:.4f}")
print(f"R²: {r2_gb:.4f}")


Gradient Boosting Evaluation:
MAE: 2.0708
RMSE: 3.3187
R²: -0.9255
