In [3]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score
from sklearn.feature_selection import SelectFromModel

# Read the CSV file
df = pd.read_csv("C:/Users/links/OneDrive/Desktop/convert1.csv")

# Handle missing values
numeric_columns = df.select_dtypes(include=['number']).columns
df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].median())

# Define features and target
feature_columns = ['HOUR1', 'HOUR2', 'HOUR3', 'HOUR4', 'HOUR5', 'HOUR6', 'HOUR7', 'HOUR8',
                   'HOUR9', 'HOUR10', 'HOUR11', 'HOUR12', 'HOUR13', 'HOUR14', 'HOUR15',
                   'HOUR16', 'HOUR17', 'HOUR18', 'HOUR19', 'HOUR20', 'HOUR21', 'HOUR22', 
                   'HOUR23', 'HOUR24']

X = df[feature_columns]  
y = df['Flood_Occurence']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Impute and scale the data
imputer = SimpleImputer(strategy='mean')
scaler = StandardScaler()

X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Feature selection using RandomForestClassifier
selector = SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='mean')
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)

# Initialize base models
rf = RandomForestClassifier(random_state=42, n_estimators=200, max_depth=15, min_samples_split=10, min_samples_leaf=5)
gbc = GradientBoostingClassifier(random_state=42, n_estimators=150, learning_rate=0.05, max_depth=4, min_samples_split=10, min_samples_leaf=5, subsample=0.8)

# Combine them using a Voting Classifier
voting_clf = VotingClassifier(estimators=[('rf', rf), ('gbc', gbc)], voting='soft')

# Fit the ensemble model
voting_clf.fit(X_train_selected, y_train)

# Make predictions
y_pred_voting = voting_clf.predict(X_test_selected)

# Evaluate the ensemble model
#print("\nVoting Classifier:")
#print("Accuracy score: %.2f%%" % (accuracy_score(y_test, y_pred_voting) * 100))
# print("Recall score: %.2f%%" % (recall_score(y_test, y_pred_voting) * 100))

#if len(np.unique(y_test)) > 1:
#    print("ROC AUC score: %.2f%%" % (roc_auc_score(y_test, y_pred_voting) * 100))
#else:
 #   print("ROC AUC score: Not applicable (only one class present in y_test).")


In [None]:
import pandas as pd

# Assuming imputer, scaler, selector, and voting_clf are defined and trained in your environment

test = pd.DataFrame()

while True:
    try:
        # Collect input for all 24 hours
        test['HOUR1'] = [input('Enter value for HOUR1: ')]
        test['HOUR2'] = [input('Enter value for HOUR2: ')]
        test['HOUR3'] = [input('Enter value for HOUR3: ')]
        test['HOUR4'] = [input('Enter value for HOUR4: ')]
        test['HOUR5'] = [input('Enter value for HOUR5: ')]
        test['HOUR6'] = [input('Enter value for HOUR6: ')]
        test['HOUR7'] = [input('Enter value for HOUR7: ')]
        test['HOUR8'] = [input('Enter value for HOUR8: ')]
        test['HOUR9'] = [input('Enter value for HOUR9: ')]
        test['HOUR10'] = [input('Enter value for HOUR10: ')]
        test['HOUR11'] = [input('Enter value for HOUR11: ')]
        test['HOUR12'] = [input('Enter value for HOUR12: ')]
        test['HOUR13'] = [input('Enter value for HOUR13: ')]
        test['HOUR14'] = [input('Enter value for HOUR14: ')]
        test['HOUR15'] = [input('Enter value for HOUR15: ')]
        test['HOUR16'] = [input('Enter value for HOUR16: ')]
        test['HOUR17'] = [input('Enter value for HOUR17: ')]
        test['HOUR18'] = [input('Enter value for HOUR18: ')]
        test['HOUR19'] = [input('Enter value for HOUR19: ')]
        test['HOUR20'] = [input('Enter value for HOUR20: ')]
        test['HOUR21'] = [input('Enter value for HOUR21: ')]
        test['HOUR22'] = [input('Enter value for HOUR22: ')]
        test['HOUR23'] = [input('Enter value for HOUR23: ')]
        test['HOUR24'] = [input('Enter value for HOUR24: ')]

        # Impute and scale the new input
        test_imputed = imputer.transform(test)
        test_scaled = scaler.transform(test_imputed)
        test_selected = selector.transform(test_scaled)

        # Predict using the trained model
        probabilities = voting_clf.predict_proba(test_selected)  # Get the probabilities for each class

        # Probability of flood occurrence (class 1)
        flood_probability = probabilities[0][1] * 100

        print(f"Flood Occurrence Probability: {flood_probability:.2f}%")
    except ValueError:
        print('Data not valid')
    except NameError as ne:
        print(f"NameError: {ne}")
    except Exception as e:
        print(f"An error occurred: {e}")
    else:
        break


Enter value for HOUR1:  0
Enter value for HOUR2:  0
Enter value for HOUR3:  0
Enter value for HOUR4:  4
Enter value for HOUR5:  8
Enter value for HOUR6:  9
Enter value for HOUR7:  8
Enter value for HOUR8:  10
Enter value for HOUR9:  14
Enter value for HOUR10:  16
Enter value for HOUR11:  20
Enter value for HOUR12:  28
Enter value for HOUR13:  39
Enter value for HOUR14:  33
Enter value for HOUR15:  20
Enter value for HOUR16:  13
Enter value for HOUR17:  7
Enter value for HOUR18:  2
Enter value for HOUR19:  0.1
Enter value for HOUR20:  0
Enter value for HOUR21:  0
Enter value for HOUR22:  0
Enter value for HOUR23:  0
Enter value for HOUR24:  0


NameError: name 'imputer' is not defined
