### **Extracting Files**

In [20]:

import os
import zipfile
import pandas as pd

zip_filename = "archive (2).zip"
extract_dir = "weather_data"

os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(zip_filename, "r") as zip_ref:
    zip_ref.extractall(extract_dir)

print("Files extracted!")

file_path = os.path.join(extract_dir, "philippines_typhoon_monthly_2014_2024.csv")
data = pd.read_csv(file_path)




Files extracted!


### **Loading the Dataset**

In [21]:
print("ðŸ“Š RAW DATA OVERVIEW")
display(data.head())

ðŸ“Š RAW DATA OVERVIEW


Unnamed: 0,Month,Number_of_Typhoons,ONI,Nino3.4_SST_anomaly,Western_Pacific_SST,Midlevel_Humidity,SeaLevelPressure,MJO_Phase,Prev_month_typhoons,Vertical_Wind_Shear
0,1,1,0.03,-0.05,0.36,54.8,1009.8,0,0,13.2
1,2,0,0.2,0.18,-0.43,59.5,1009.5,7,1,13.8
2,3,0,0.02,0.0,0.01,49.3,1009.4,6,0,13.0
3,4,0,0.13,0.07,-0.15,57.8,1008.2,0,0,10.7
4,5,0,-0.03,-0.05,-0.41,61.4,1008.1,3,0,11.1


### **Checking for Missing Values**

In [22]:

data.isnull().sum()


Month                  0
Number_of_Typhoons     0
ONI                    0
Nino3.4_SST_anomaly    0
Western_Pacific_SST    0
Midlevel_Humidity      0
SeaLevelPressure       0
MJO_Phase              0
Prev_month_typhoons    0
Vertical_Wind_Shear    0
dtype: int64

### **Getting the Inputs and Outputs**

### Independent Variables

In [23]:
X = data.iloc[:, :-1]   
X.head()                


Unnamed: 0,Month,Number_of_Typhoons,ONI,Nino3.4_SST_anomaly,Western_Pacific_SST,Midlevel_Humidity,SeaLevelPressure,MJO_Phase,Prev_month_typhoons
0,1,1,0.03,-0.05,0.36,54.8,1009.8,0,0
1,2,0,0.2,0.18,-0.43,59.5,1009.5,7,1
2,3,0,0.02,0.0,0.01,49.3,1009.4,6,0
3,4,0,0.13,0.07,-0.15,57.8,1008.2,0,0
4,5,0,-0.03,-0.05,-0.41,61.4,1008.1,3,0


### Dependent Variables

In [24]:
y = data.iloc[:, -1]
y.head()


0    13.2
1    13.8
2    13.0
3    10.7
4    11.1
Name: Vertical_Wind_Shear, dtype: float64

### **Scaling**

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

### **Creating the Training Set and the Test Set**

In [26]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2,random_state=0)

In [27]:
X_train

Unnamed: 0,Month,Number_of_Typhoons,ONI,Nino3.4_SST_anomaly,Western_Pacific_SST,Midlevel_Humidity,SeaLevelPressure,MJO_Phase,Prev_month_typhoons
63,4,0,0.56,0.52,0.57,64.7,1007.7,0,0
71,12,0,0.66,0.59,-0.10,54.8,1009.5,0,3
86,3,0,-0.91,-0.94,-0.28,58.2,1009.4,4,1
33,10,5,-0.67,-0.63,-0.26,64.6,1004.5,0,2
129,10,2,0.99,0.94,0.31,59.2,1005.7,6,3
...,...,...,...,...,...,...,...,...,...
9,10,4,-0.01,0.07,-0.32,60.7,1004.6,8,2
103,8,1,-1.25,-1.27,0.13,67.6,1004.5,5,5
67,8,1,0.64,0.70,-0.29,73.8,1004.6,3,1
117,10,2,0.17,0.20,-0.11,60.9,1005.1,3,5


In [28]:
X_test

Unnamed: 0,Month,Number_of_Typhoons,ONI,Nino3.4_SST_anomaly,Western_Pacific_SST,Midlevel_Humidity,SeaLevelPressure,MJO_Phase,Prev_month_typhoons
93,10,3,-1.07,-0.96,0.63,65.2,1006.2,1,2
66,7,1,0.7,0.73,-0.11,68.5,1006.1,8,4
26,3,0,-0.53,-0.46,-0.41,51.0,1008.4,0,0
8,9,2,-0.11,-0.05,0.33,73.1,1004.7,3,2
30,7,1,-0.47,-0.49,-0.44,72.1,1004.9,3,3
91,8,3,-0.87,-0.9,0.13,69.8,1005.5,4,0
109,2,0,0.02,-0.03,0.32,62.0,1008.3,6,2
100,5,1,-1.15,-1.19,-0.5,63.2,1007.7,5,1
44,9,2,-0.09,-0.14,-0.02,67.6,1003.4,7,2
101,6,1,-1.32,-1.47,-0.03,75.7,1004.5,6,1


In [29]:
y_train

63     10.9
71     11.6
86     12.8
33      9.0
129     9.1
       ... 
9       9.4
103     8.1
67      7.9
117     9.3
47     12.3
Name: Vertical_Wind_Shear, Length: 105, dtype: float64

In [30]:
y_test

93      9.0
66      8.6
26     13.0
8       6.9
30      8.2
91      8.8
109    12.7
100    10.8
44      8.1
101     8.5
99     10.4
56      8.5
43      8.3
96     12.3
22      9.5
78      8.0
95     12.4
97     12.3
7       8.0
24     12.3
61     13.7
10     10.4
108    13.1
45      8.6
16     11.4
2      13.0
68      8.5
Name: Vertical_Wind_Shear, dtype: float64

## **Building the Training Model**

### **Building the Model**

In [31]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()


### **Training the Model**

In [32]:
model.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


### **Inference**

In [33]:
y_pred = model.predict(X_test)
y_pred

array([ 9.18679541,  9.35353832, 12.08255402,  7.80606333,  8.26848589,
        8.38572827, 11.25446396, 10.5350408 ,  7.82312614,  7.7301682 ,
       10.24287393,  8.46867888,  8.22804065, 12.64422259,  9.65587693,
        8.89702869, 11.98289764, 12.36464909,  8.50572444, 12.86120924,
       12.05641974, 11.11898453, 12.14792419,  8.52943446, 10.43744985,
       12.84460103,  8.65084991])

In [34]:
comparison = pd.DataFrame({
    "Actual": y_test,
    "Predicted": y_pred
})

comparison.head()



Unnamed: 0,Actual,Predicted
93,9.0,9.186795
66,8.6,9.353538
26,13.0,12.082554
8,6.9,7.806063
30,8.2,8.268486


One data point, including Month, Number_of_Typhoons, ONI, Nino3.4_SST_anomaly,  Western Pacific SST, Midlevel_Humidity, SeaLevelPressure, MJO phase, and Prev_month_typhoon, is used for making the prediction of the Vertical Wind Shear.


In [35]:


sample = pd.DataFrame([[1,1,0.03,-0.05,0.36,54.8,1009.8,0,0]],
                      columns=X.columns)

model.predict(sample)



array([12.35877554])

In [36]:
# Pang test, para madaling makita ang data
print("ðŸ“Š RAW DATA OVERVIEW")
display(data.head())

ðŸ“Š RAW DATA OVERVIEW


Unnamed: 0,Month,Number_of_Typhoons,ONI,Nino3.4_SST_anomaly,Western_Pacific_SST,Midlevel_Humidity,SeaLevelPressure,MJO_Phase,Prev_month_typhoons,Vertical_Wind_Shear
0,1,1,0.03,-0.05,0.36,54.8,1009.8,0,0,13.2
1,2,0,0.2,0.18,-0.43,59.5,1009.5,7,1,13.8
2,3,0,0.02,0.0,0.01,49.3,1009.4,6,0,13.0
3,4,0,0.13,0.07,-0.15,57.8,1008.2,0,0,10.7
4,5,0,-0.03,-0.05,-0.41,61.4,1008.1,3,0,11.1


## **Evaluating the Model**

### **Mean Absolute Error**

In [37]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, y_pred)
print("MAE:", mae)

MAE: 0.5134824676879167


### **Mean Squared Error**

In [38]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)


MSE: 0.4481871001037773


### **R-Squared**

In [39]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
r2

0.8927303303580422

### **Adjusted R-Squared**

In [40]:
k = X_test.shape[1]
n = X_test.shape[0]
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)
adj_r2

0.8359405052534763

1. **Model Performance**

The model achieved good/acceptable accuracy based on the RÂ² and Adjusted RÂ² values.
This means the features (climate factors) explain a significant portion of the variation in Vertical Wind Shear.

2. **Error Analysis**

The MAE and RMSE values show the average difference between predicted and actual values.
Smaller values mean the model produces predictions close to the real data.

3. **Trends Observed**

Based on the predictions, the model suggests stable and consistent wind shear patterns, indicating there are no extreme increases or drops for the months predicted.