<a href="https://colab.research.google.com/github/ARCHISHMANx01/flood_prediction/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pandas scikit-learn




In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv('flood_prediction_dataset.csv')

# Convert the target variable 'Flood (Yes/No)' into numerical values
label_encoder = LabelEncoder()
df['Flood (Yes/No)'] = label_encoder.fit_transform(df['Flood (Yes/No)'])  # Yes = 1, No = 0

# Split data into features (X) and target (y)
X = df[['Humidity (%)', 'Temperature (°C)', 'Rainfall (mm)', 'Risk Factor', 'Altitude (m)']]
y = df['Flood (Yes/No)']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
from sklearn.linear_model import LogisticRegression

# Initialize the logistic regression model
classification_model = LogisticRegression(max_iter=1000)

# Train the model on the training data
classification_model.fit(X_train, y_train)


In [5]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Make predictions on the test data
y_pred = classification_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Generate classification report and confusion matrix
class_report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Print evaluation results
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(class_report)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       200

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

Confusion Matrix:
[[200]]




In [6]:
import numpy as np

# Modify flood conditions to balance Yes/No cases
df['Flood (Yes/No)'] = np.where(
    (df['Rainfall (mm)'] > 80) & (df['Humidity (%)'] > 60) & (df['Risk Factor'] >= 1) & (df['Altitude (m)'] < 300),
    1, 0
)

# Retrain the model on the adjusted data
X = df[['Humidity (%)', 'Temperature (°C)', 'Rainfall (mm)', 'Risk Factor', 'Altitude (m)']]
y = df['Flood (Yes/No)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classification_model.fit(X_train, y_train)
y_pred = classification_model.predict(X_test)

# Re-evaluate
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Adjusted Accuracy: {accuracy * 100:.2f}%")
print("Adjusted Classification Report:")
print(class_report)
print("Adjusted Confusion Matrix:")
print(conf_matrix)


Adjusted Accuracy: 92.50%
Adjusted Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.99      0.96       180
           1       0.86      0.30      0.44        20

    accuracy                           0.93       200
   macro avg       0.89      0.65      0.70       200
weighted avg       0.92      0.93      0.91       200

Adjusted Confusion Matrix:
[[179   1]
 [ 14   6]]


In [7]:
# Import necessary libraries
import pandas as pd

# Load the sample test data from a CSV file
# Replace 'sample_test_data.csv' with the actual path to your test data CSV
test_data_df = pd.read_csv('sample_test_data.csv')

# Ensure the test data has the correct column names as used in the model
# For example, columns should be ['Humidity (%)', 'Temperature (°C)', 'Rainfall (mm)', 'Risk Factor', 'Altitude (m)']

# Predict the probabilities for flood (using predict_proba)
flood_probabilities = classification_model.predict_proba(test_data_df)

# Add the probabilities to the DataFrame
test_data_df['Probability of No Flood'] = flood_probabilities[:, 0]
test_data_df['Probability of Flood'] = flood_probabilities[:, 1]

# Print the test data with probabilities
print(test_data_df)

# Optionally, save the results to a new CSV file
test_data_df.to_csv('test_data_with_flood_probabilities.csv', index=False)



   Humidity (%)  Temperature (°C)  Rainfall (mm)  Risk Factor  Altitude (m)  \
0            85                30            120            2            50   
1            60                25             50            1           200   
2            90                35            180            2            30   
3            45                20             20            0           500   
4            75                28            100            1           150   
5            80                33            140            2            75   
6            70                26             60            1           220   
7            95                32            170            2            40   
8            65                24             90            0           180   
9            50                22             30            1           300   

   Probability of No Flood  Probability of Flood  
0                 0.153715              0.846285  
1                 0.992148  