In [1]:
# Import necessary libraries
import pandas as pd  # For handling datasets
import numpy as np  # For numerical operations
from sklearn.model_selection import train_test_split  # For splitting the dataset
from sklearn.ensemble import GradientBoostingRegressor  # Machine Learning model for regression
from sklearn.metrics import mean_absolute_error, r2_score  # Evaluation metrics

In [2]:
# Load the dataset from a CSV file
data = pd.read_csv("C:/Users/Minusha Attygala/OneDrive/Documents/Big Data Practicals/sl_inflation.csv")

In [3]:
# Display the first few rows of the dataset to check data structure
data.head()

Unnamed: 0,Year,Interest_Rate (%),Exchange_Rate (LKR/USD),Fuel_Price (LKR/litre),Food_Price_Index,Govt_Spending (Billion LKR),Imports (Billion LKR),Exports (Billion LKR),Inflation_Rate (%)
0,2018,8.5,155,120,110,2500,4000,2000,4.5
1,2019,7.0,180,135,115,2700,4100,2100,5.1
2,2020,6.0,185,140,120,2900,3800,2200,5.8
3,2021,5.5,200,145,125,3200,3500,2300,6.3
4,2022,15.0,365,420,250,4500,3200,2500,55.0


In [4]:
# Selecting independent (X) and dependent (y) variables
x = data.drop(columns=["Year", "Inflation_Rate (%)"])  # Features (excluding Year and target column)
y = data["Inflation_Rate (%)"]  # Target variable (Inflation Rate)

In [5]:
# Splitting the dataset into training and testing sets (80% training, 20% testing)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [6]:
# Initializing and training the Gradient Boosting Regressor model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(x_train, y_train)  # Fit the model to training data

- n_estimators = Number of decision trees need to draw for the final result.
- learning_rate =  It represents the amount of percentage must be included for the final model. (10%)
- max_depth = Number of levels of the decision tree.
- random_state = To ensure the reproductibility of the tree

* From each tree that it drwas 10% of it's insights must be present in the final result. To get the final result we are doing 100 experiments.

In [7]:
# Making predictions on the test set
predictions = model.predict(x_test)

In [8]:
# Evaluating the model's performance using R² score
r2 = r2_score(y_test, predictions)
print(f"Accuracy = {r2:.2f}")  # Display the accuracy of the model

Accuracy = 0.92


In [9]:
# Predicting inflation rate for a future year (hypothetical input data)
next_year = np.array([[8.2, 255, 260, 300, 3900, 3500, 2400]])  # Feature values for prediction
future = model.predict(next_year)  # Predict inflation rate
print(f"Inflation Rate in 2029 = {future[0]:.2f}%")  # Display the predicted value

Inflation Rate in 2029 = 23.94%


