In [2]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# Load dataset
file_path = r"C:\Users\incha\Downloads\Cyclone.csv"  # Update path if needed
df = pd.read_csv(file_path)

# Drop unnecessary columns
df = df.drop(columns=["index", "FID", "BTID", "NAME", "BASIN"])

# Encode categorical target variable
le = LabelEncoder()
df["CAT"] = le.fit_transform(df["CAT"])

# Define features and targets
X = df[["LAT", "LONG", "WIND_KTS", "PRESSURE", "YEAR"]]
y_cat = df["CAT"]
y_shape = df["Shape_Leng"]

# Split dataset
X_train, X_test, y_cat_train, y_cat_test, y_shape_train, y_shape_test = train_test_split(
    X, y_cat, y_shape, test_size=0.15, random_state=42
)

# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train models
clf = RandomForestClassifier(n_estimators=500, random_state=42, n_jobs=-1)
clf.fit(X_train_scaled, y_cat_train)

reg_shape = RandomForestRegressor(n_estimators=500, random_state=42, n_jobs=-1)
reg_shape.fit(X_train_scaled, y_shape_train)

# Save models
joblib.dump(clf, "cyclone_clf.pkl")
joblib.dump(reg_shape, "cyclone_reg.pkl")
joblib.dump(scaler, "scaler1.pkl")
joblib.dump(le, "label_encoder.pkl")

print("✅ Models trained and saved successfully!")


✅ Models trained and saved successfully!
