### Importing Libraries

In [None]:
# --- Basic imports ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- ML imports ---
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# --- Display settings ---
pd.set_option('display.max_columns', None)
sns.set(style="whitegrid", palette="pastel")

print("Successfully imported all libraries.")

### Loading Dataset

In [None]:
# Loading dataset
df = pd.read_csv("../data/data.csv")

# Showing basic info
print("Dataset loaded successfully!")
print(f"Shape: {df.shape}") # Printing shape of the dataset
df.head()  # Display first five rows

Dataset loaded successfully!
Shape: (1341, 6)


Unnamed: 0,LOCATION,TIME,PC_HEALTHXP,PC_GDP,USD_CAP,TOTAL_SPEND
0,AUS,1971,15.992,0.726,33.99,439.73
1,AUS,1972,15.091,0.685,34.184,450.44
2,AUS,1973,15.117,0.681,37.956,507.85
3,AUS,1974,14.771,0.754,45.338,622.17
4,AUS,1975,11.849,0.682,44.363,616.34


In [None]:
# Info about data types and missing values
df.info()

print("\n--- Missing Values ---")
print(df.isnull().sum()) # Displaying missing values per column

print("\n--- Descriptive Statistics ---")
display(df.describe()) # Displaying descriptive statistics (mean, std, min, max, etc.)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1341 entries, 0 to 1340
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   LOCATION     1341 non-null   object 
 1   TIME         1341 non-null   int64  
 2   PC_HEALTHXP  1341 non-null   float64
 3   PC_GDP       1341 non-null   float64
 4   USD_CAP      1341 non-null   float64
 5   TOTAL_SPEND  1341 non-null   float64
dtypes: float64(4), int64(1), object(1)
memory usage: 63.0+ KB

--- Missing Values ---
LOCATION       0
TIME           0
PC_HEALTHXP    0
PC_GDP         0
USD_CAP        0
TOTAL_SPEND    0
dtype: int64

--- Descriptive Statistics ---


Unnamed: 0,TIME,PC_HEALTHXP,PC_GDP,USD_CAP,TOTAL_SPEND
count,1341.0,1341.0,1341.0,1341.0,1341.0
mean,2001.134228,16.632877,1.215524,338.278398,14439.322088
std,14.093659,6.61332,0.479568,226.158326,43259.358895
min,1970.0,5.545,0.207,3.628,5.81
25%,1990.0,11.849,0.802,144.592,756.28
50%,2004.0,15.238,1.187,317.795,2786.19
75%,2013.0,20.58,1.524,496.858,8794.36
max,2022.0,40.239,2.87,1432.309,475596.74
