In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import RandomOverSampler

In [2]:
df = pd.read_csv('2001-2022storms.csv')
df

Unnamed: 0,ID,NAME,YEAR,MONTH,DAY,HOUR,LAT,LONG,STATUS,CATEGORY,WIND,PRESSURE,TROPICALSTORM_FORCE_DIAMETER,HURRICANE_FORCE_DIAMETER
0,ALLISON200106,Allison,2001,6,5,12,27.5,-95.0,Tropical Storm,,40,1007,,
1,ALLISON200106,Allison,2001,6,5,18,28.5,-95.3,Tropical Storm,,50,1002,,
2,ALLISON200106,Allison,2001,6,5,21,28.9,-95.3,Tropical Storm,,45,1003,,
3,ALLISON200106,Allison,2001,6,6,0,29.3,-95.3,Tropical Storm,,45,1003,,
4,ALLISON200106,Allison,2001,6,6,6,30.1,-95.2,Tropical Depression,,30,1006,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11582,NICOLE202211,Nicole,2022,11,10,19,29.2,-83.0,Tropical Storm,,40,989,300.0,0.0
11583,NICOLE202211,Nicole,2022,11,11,0,30.1,-84.0,Tropical Storm,,35,992,300.0,0.0
11584,NICOLE202211,Nicole,2022,11,11,6,31.2,-84.6,Tropical Depression,,30,996,0.0,0.0
11585,NICOLE202211,Nicole,2022,11,11,12,33.2,-84.6,Tropical Depression,,25,999,0.0,0.0


In [3]:
df["CATEGORY"] = df["CATEGORY"].fillna(0)

# Separate features and target variable
X = df.drop(columns=['CATEGORY', 'ID', 'NAME', 'STATUS', 'TROPICALSTORM_FORCE_DIAMETER', 'HURRICANE_FORCE_DIAMETER'])
y = df['CATEGORY']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply oversampling to the training set
oversampler = RandomOverSampler(random_state=42)
X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

# Create a Random Forest Classifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the resampled data
model.fit(X_train_resampled, y_train_resampled)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display classification report
print('Classification Report:\n', classification_report(y_test, y_pred))

Accuracy: 0.99
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1780
         1.0       0.97      0.98      0.98       262
         2.0       1.00      1.00      1.00        99
         3.0       1.00      1.00      1.00        81
         4.0       0.95      1.00      0.97        74
         5.0       1.00      0.82      0.90        22

    accuracy                           0.99      2318
   macro avg       0.99      0.97      0.97      2318
weighted avg       0.99      0.99      0.99      2318



In [4]:

conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:\n', conf_matrix)


Confusion Matrix:
 [[1773    7    0    0    0    0]
 [   6  256    0    0    0    0]
 [   0    0   99    0    0    0]
 [   0    0    0   81    0    0]
 [   0    0    0    0   74    0]
 [   0    0    0    0    4   18]]


In [5]:
new_storm = pd.DataFrame({
    'YEAR': [2023],
    'MONTH': [8],
    'DAY': [15],
    'HOUR': [12],
    'LAT': [25.0],
    'LONG': [-80.0],
    'WIND': [70],
    'PRESSURE': [1005]
})

In [6]:
new_predictions = model.predict(new_storm)

In [7]:
probabilities = model.predict_proba(new_storm)
print(probabilities)

[[0.26 0.74 0.   0.   0.   0.  ]]


In [8]:
predicted_classes = probabilities.argmax(axis=1)
print("Predicted Category for New Data:")
print(predicted_classes)

Predicted Category for New Data:
[1]
