In [1]:
pip install pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")  


label_encoders = {}
for col in ['Reservoir_name', 'SUBDIVISION']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


X = df.drop(columns=['Level'])  
y = df['Level']                 


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score (Accuracy): {r2 * 100:.2f}%")


Mean Squared Error: 357.84
R² Score (Accuracy): 99.13%


In [14]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings

warnings.filterwarnings("ignore")

df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")

for col in ['Reservoir_name', 'SUBDIVISION']:
    df[col] = LabelEncoder().fit_transform(df[col])

df['Level_Class'] = pd.cut(df['Level'], bins=[-float('inf'), 50, 150, float('inf')], labels=[0, 1, 2])

X = df.drop(columns=['Level', 'Level_Class'])
y = df['Level_Class'].astype(int)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%\n")
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Accuracy: 86.70%

Confusion Matrix:
[[  0   6   0]
 [  0  40  51]
 [  0  37 573]]

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.48      0.44      0.46        91
           2       0.92      0.94      0.93       610

    accuracy                           0.87       707
   macro avg       0.47      0.46      0.46       707
weighted avg       0.85      0.87      0.86       707



In [7]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")

for col in ['Reservoir_name', 'SUBDIVISION']:
    df[col] = LabelEncoder().fit_transform(df[col])

df['Level_Class'] = pd.cut(df['Level'], bins=[-float('inf'), 50, 150, float('inf')], labels=[0, 1, 2])

X = df.drop(columns=['Level', 'Level_Class'])
y = df['Level_Class'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%\n")
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Accuracy: 90.66%

Confusion Matrix:
[[  1   5   0]
 [  0  56  35]
 [  0  26 584]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.17      0.29         6
           1       0.64      0.62      0.63        91
           2       0.94      0.96      0.95       610

    accuracy                           0.91       707
   macro avg       0.86      0.58      0.62       707
weighted avg       0.91      0.91      0.90       707



In [8]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")

for col in ['Reservoir_name', 'SUBDIVISION']:
    df[col] = LabelEncoder().fit_transform(df[col])

df['Level_Class'] = pd.cut(df['Level'], bins=[-float('inf'), 50, 150, float('inf')], labels=[0, 1, 2])

X = df.drop(columns=['Level', 'Level_Class'])
y = df['Level_Class'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GaussianNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%\n")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 85.43%

Confusion Matrix:
[[  6   0   0]
 [  2  88   1]
 [  0 100 510]]

Classification Report:
              precision    recall  f1-score   support

           0       0.75      1.00      0.86         6
           1       0.47      0.97      0.63        91
           2       1.00      0.84      0.91       610

    accuracy                           0.85       707
   macro avg       0.74      0.93      0.80       707
weighted avg       0.93      0.85      0.87       707



In [9]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")

for col in ['Reservoir_name', 'SUBDIVISION']:
    df[col] = LabelEncoder().fit_transform(df[col])

X = df.drop(columns=['Level'])
y = df['Level']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).ravel()

model = SVR(kernel='rbf')
model.fit(X_train_scaled, y_train_scaled)

y_pred_scaled = model.predict(X_test_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")


Mean Squared Error: 5187.99
R² Score: 0.87


In [11]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")

for col in ['Reservoir_name', 'SUBDIVISION']:
    df[col] = LabelEncoder().fit_transform(df[col])

df['Level_Class'] = pd.cut(df['Level'], bins=[-float('inf'), 50, 150, float('inf')], labels=[0, 1, 2])

X = df.drop(columns=['Level', 'Level_Class'])
y = df['Level_Class'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 99.29%
Confusion Matrix:
[[  2   4   0]
 [  0  91   0]
 [  0   1 609]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         6
           1       0.95      1.00      0.97        91
           2       1.00      1.00      1.00       610

    accuracy                           0.99       707
   macro avg       0.98      0.78      0.82       707
weighted avg       0.99      0.99      0.99       707



In [13]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings

warnings.filterwarnings("ignore")

df = pd.read_csv("C:\\Users\\PALLAVI\\OneDrive\\Documents\\CleanedDataset.csv")

for col in ['Reservoir_name', 'SUBDIVISION']:
    df[col] = LabelEncoder().fit_transform(df[col])

df['Level_Class'] = pd.cut(df['Level'], bins=[-float('inf'), 50, 150, float('inf')], labels=[0, 1, 2])

X = df.drop(columns=['Level', 'Level_Class'])
y = df['Level_Class'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

bad_k = 1
model = KNeighborsClassifier(n_neighbors=bad_k)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

print(f"K = {bad_k}")
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


K = 1
Accuracy: 94.06%
Confusion Matrix:
[[  0   6   0]
 [  0  71  20]
 [  0  16 594]]

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.76      0.78      0.77        91
           2       0.97      0.97      0.97       610

    accuracy                           0.94       707
   macro avg       0.58      0.58      0.58       707
weighted avg       0.93      0.94      0.94       707

