In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the data
file_path = '/content/RainDataSet(1970-2016).csv'
data = pd.read_csv(file_path)

# Define a flood threshold (e.g., 200mm of rainfall indicates a flood)
flood_threshold = 200

# Create a new target column for flood detection
data['Flood'] = (data['Rainfall'] > flood_threshold).astype(int)

# Select features and target
X = data[['Year', 'Month', 'StationIndex']]
y = data['Flood']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Train a Random Forest Classifier
random_forest = RandomForestClassifier(random_state=42, n_estimators=100)
random_forest.fit(X_train, y_train)

# Evaluate models
dt_predictions = decision_tree.predict(X_test)
rf_predictions = random_forest.predict(X_test)

# Calculate accuracy for both models
dt_accuracy = accuracy_score(y_test, dt_predictions)
rf_accuracy = accuracy_score(y_test, rf_predictions)

# Print evaluation metrics
print("Decision Tree Accuracy:", dt_accuracy)
print("Random Forest Accuracy:", rf_accuracy)
print("Decision Tree Classification Report:\n", classification_report(y_test, dt_predictions))
print("Random Forest Classification Report:\n", classification_report(y_test, rf_predictions))

# --- Prediction Code ---
# User Input
print("\n--- Flood Prediction ---")
user_year = int(input("Enter Year: "))
user_station = input("Enter Station Name (optional, not used in features): ")
user_month = int(input("Enter Month (1-12): "))
user_rainfall = float(input("Enter Rainfall (in mm): "))
user_station_index = int(input("Enter Station Index: "))

# Create a DataFrame for the input
user_input = pd.DataFrame({
    'Year': [user_year],
    'Month': [user_month],
    'StationIndex': [user_station_index]
})

# Predict using the Random Forest model
flood_prediction = random_forest.predict(user_input)[0]

# Output Result
if flood_prediction == 1:
    print(f"\nFlood is likely to occur (Rainfall: {user_rainfall} mm).")
else:
    print(f"\nFlood is not occur (Rainfall: {user_rainfall} mm).")