#  Use case 2:Predicting wether Immediate clean up required or not based on Trash level,Trash weight and no.of visitors

# Model 1:Decision Tree

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the preprocessed data
ocean = pd.read_csv('cleaned_ocean.csv')

# Selecting relevant columns
X = ocean[['Pounds', 'Total Items Collected', 'People']]
y = ocean['Immediate Cleanup Required']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the decision tree model
dt_model = DecisionTreeClassifier(
    max_depth=5,              # Example: maximum depth of the tree
    min_samples_split=10,     # Minimum number of samples required to split an internal node
    min_samples_leaf=4,       # Minimum number of samples required to be at a leaf node
    random_state=42)
# Train the model
dt_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = dt_model.predict(X_test_scaled)

# Evaluate the model
accuracy_regularized = accuracy_score(y_test, y_pred)
conf_matrix_regularized = confusion_matrix(y_test, y_pred)
classification_rep_regularized = classification_report(y_test, y_pred)

# Print the evaluation metrics
print(f"Accuracy (Regularized): {accuracy_regularized:.2f}")
print("Confusion Matrix (Regularized):\n", conf_matrix_regularized)
print("Classification Report (Regularized):\n", classification_rep_regularized)


Accuracy (Regularized): 1.00
Confusion Matrix (Regularized):
 [[3783    0]
 [   0 3798]]
Classification Report (Regularized):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      3783
           1       1.00      1.00      1.00      3798

    accuracy                           1.00      7581
   macro avg       1.00      1.00      1.00      7581
weighted avg       1.00      1.00      1.00      7581



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the preprocessed data
ocean = pd.read_csv('cleaned_ocean.csv')

# Selecting relevant columns
X = ocean[['Pounds', 'Total Items Collected', 'People']]
y = ocean['Immediate Cleanup Required']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the decision tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Train the model
dt_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = dt_model.predict(X_test_scaled)

def predict_cleanup_requirement(pounds, total_items_collected, people):
    # Convert the inputs into a DataFrame
    input_data = pd.DataFrame({'Pounds': [pounds], 
                               'Total Items Collected': [total_items_collected], 
                               'People': [people]})
    
    # Standardize the features
    input_data_scaled = scaler.transform(input_data)

    # Make a prediction
    prediction = dt_model.predict(input_data_scaled)

    # Translate the prediction to a more readable format
    if prediction == 0:
        return "No immediate cleanup required"
    else:
        return "Immediate cleanup required"

# Example usage of the function
example_pounds = 100
example_total_items = 50
example_people = 10

prediction = predict_cleanup_requirement(example_pounds, example_total_items, example_people)
print(prediction)


Immediate cleanup required


# Model 2:Random Forest

In [3]:
# Initialize the model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,precision_score,recall_score,f1_score

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = dt_model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
dt_model.fit(X_train_scaled, y_train)
training_accuracy = accuracy_score(y_train, dt_model.predict(X_train_scaled))
validation_accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Training Accuracy: {training_accuracy:.2f}")
print(f"Test Accuracy: {validation_accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Confusion Matrix of binary class classification problem:\n",conf_matrix)

Training Accuracy: 1.00
Test Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
Confusion Matrix of binary class classification problem:
 [[3783    0]
 [   0 3798]]


In [4]:
def predict_cleanup_requirement(pounds, total_items_collected, people, model, scaler):
    # Convert the inputs into a DataFrame
    input_data = pd.DataFrame({'Pounds': [pounds], 
                               'Total Items Collected': [total_items_collected], 
                               'People': [people]})
    
    # Standardize the features
    input_data_scaled = scaler.transform(input_data)

    # Make a prediction
    prediction_label = model.predict(input_data_scaled)[0]

    # Translate the prediction to a more readable format
    cleanup_classes = ["No immediate cleanup required", "Immediate cleanup required"]
    prediction_readable = cleanup_classes[prediction_label]

    return prediction_readable

# Example usage of the function
example_pounds = 100
example_total_items = 50
example_people = 10

prediction = predict_cleanup_requirement(example_pounds, example_total_items, example_people, dt_model, scaler)
print(prediction)

Immediate cleanup required


## Model 3. Logistic Regression

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score


# Initialize the logistic regression model
lr_model = LogisticRegression(random_state=42)

# Train the model
lr_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_lr = lr_model.predict(X_test_scaled)

# Evaluate the model
accuracy_lr = accuracy_score(y_test, y_pred_lr)
conf_matrix_lr = confusion_matrix(y_test, y_pred_lr)
classification_rep_lr = classification_report(y_test, y_pred_lr)
training_accuracy_lr = accuracy_score(y_train, lr_model.predict(X_train_scaled))
validation_accuracy_lr = accuracy_score(y_test, y_pred_lr)
precision_lr = precision_score(y_test, y_pred_lr, average='weighted')
recall_lr = recall_score(y_test, y_pred_lr, average='weighted')
f1_lr = f1_score(y_test, y_pred_lr, average='weighted')

print(f"Training Accuracy (Logistic Regression): {training_accuracy_lr:.2f}")
print(f"Test Accuracy (Logistic Regression): {validation_accuracy_lr:.2f}")
print(f"Precision (Logistic Regression): {precision_lr:.2f}")
print(f"Recall (Logistic Regression): {recall_lr:.2f}")
print(f"F1 Score (Logistic Regression): {f1_lr:.2f}")
print(f"Confusion Matrix of binary class classification problem (Logistic Regression):\n", conf_matrix_lr)
print("Classification Report (Logistic Regression):\n", classification_rep_lr)

Training Accuracy (Logistic Regression): 0.86
Test Accuracy (Logistic Regression): 0.87
Precision (Logistic Regression): 0.88
Recall (Logistic Regression): 0.87
F1 Score (Logistic Regression): 0.87
Confusion Matrix of binary class classification problem (Logistic Regression):
 [[3620  163]
 [ 841 2957]]
Classification Report (Logistic Regression):
               precision    recall  f1-score   support

           0       0.81      0.96      0.88      3783
           1       0.95      0.78      0.85      3798

    accuracy                           0.87      7581
   macro avg       0.88      0.87      0.87      7581
weighted avg       0.88      0.87      0.87      7581



In [6]:
def predict_cleanup_requirement(pounds, total_items_collected, people, model, scaler):
    # Convert the inputs into a DataFrame
    input_data = pd.DataFrame({'Pounds': [pounds], 
                               'Total Items Collected': [total_items_collected], 
                               'People': [people]})
    
    # Standardize the features
    input_data_scaled = scaler.transform(input_data)

    # Make a prediction
    prediction_label = model.predict(input_data_scaled)[0]

    # Translate the prediction to a more readable format
    cleanup_classes = ["No immediate cleanup required", "Immediate cleanup required"]
    prediction_readable = cleanup_classes[prediction_label]

    return prediction_readable

# Example usage of the function
example_pounds = 100
example_total_items = 50
example_people = 10

prediction = predict_cleanup_requirement(example_pounds, example_total_items, example_people, lr_model, scaler)
print(prediction)

No immediate cleanup required


## Model 4: KNN

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score


# Initialize the K-Nearest Neighbors model
knn_model = KNeighborsClassifier(n_neighbors=3)  # You can adjust the number of neighbors as needed

# Train the model
knn_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_knn = knn_model.predict(X_test_scaled)

# Evaluate the model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
conf_matrix_knn = confusion_matrix(y_test, y_pred_knn)
classification_rep_knn = classification_report(y_test, y_pred_knn)
training_accuracy_knn = accuracy_score(y_train, knn_model.predict(X_train_scaled))
validation_accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn, average='weighted')
recall_knn = recall_score(y_test, y_pred_knn, average='weighted')
f1_knn = f1_score(y_test, y_pred_knn, average='weighted')

print(f"Training Accuracy (KNN): {training_accuracy_knn:.2f}")
print(f"Test Accuracy (KNN): {validation_accuracy_knn:.2f}")
print(f"Precision (KNN): {precision_knn:.2f}")
print(f"Recall (KNN): {recall_knn:.2f}")
print(f"F1 Score (KNN): {f1_knn:.2f}")
print(f"Confusion Matrix of binary class classification problem (KNN):\n", conf_matrix_knn)
print("Classification Report (Logistic Regression):\n", classification_rep_knn)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


Training Accuracy (KNN): 0.99
Test Accuracy (KNN): 0.98
Precision (KNN): 0.98
Recall (KNN): 0.98
F1 Score (KNN): 0.98
Confusion Matrix of binary class classification problem (KNN):
 [[3731   52]
 [  69 3729]]
Classification Report (Logistic Regression):
               precision    recall  f1-score   support

           0       0.98      0.99      0.98      3783
           1       0.99      0.98      0.98      3798

    accuracy                           0.98      7581
   macro avg       0.98      0.98      0.98      7581
weighted avg       0.98      0.98      0.98      7581



In [8]:
def predict_cleanup_requirement_knn(pounds, total_items_collected, people, model, scaler):
    # Convert the inputs into a DataFrame
    input_data = pd.DataFrame({'Pounds': [pounds], 
                               'Total Items Collected': [total_items_collected], 
                               'People': [people]})
    
    # Standardize the features
    input_data_scaled = scaler.transform(input_data)

    # Make a prediction
    prediction_label = model.predict(input_data_scaled)[0]

    # Translate the prediction to a more readable format
    cleanup_classes = ["No immediate cleanup required", "Immediate cleanup required"]
    prediction_readable = cleanup_classes[prediction_label]

    return prediction_readable

# Example usage of the function with KNN model
example_pounds_knn = 100
example_total_items_knn = 50
example_people_knn = 10

prediction_knn = predict_cleanup_requirement_knn(example_pounds_knn, example_total_items_knn, example_people_knn, knn_model, scaler)
print(prediction_knn)


Immediate cleanup required


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


# Model 5 :SVM

In [9]:
# SVM

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the preprocessed data
ocean = pd.read_csv('cleaned_ocean.csv')

# Selecting relevant columns
X = ocean[['Pounds', 'Total Items Collected', 'People']]
y = ocean['Immediate Cleanup Required']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Initialize the SVM model
svm_model = SVC(random_state=42)

# Train the model
svm_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_svm = svm_model.predict(X_test_scaled)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)
classification_rep_svm = classification_report(y_test, y_pred_svm)

# Print the evaluation metrics
print(f"Accuracy (SVM): {accuracy_svm:.2f}")
print("Confusion Matrix (SVM):\n", conf_matrix_svm)
print("Classification Report (SVM):\n", classification_rep_svm)


Accuracy (SVM): 0.88
Confusion Matrix (SVM):
 [[3570  213]
 [ 692 3106]]
Classification Report (SVM):
               precision    recall  f1-score   support

           0       0.84      0.94      0.89      3783
           1       0.94      0.82      0.87      3798

    accuracy                           0.88      7581
   macro avg       0.89      0.88      0.88      7581
weighted avg       0.89      0.88      0.88      7581



In [10]:


def predict_cleanup_requirement_svm(pounds, total_items_collected, people):
    # Convert the inputs into a DataFrame
    input_data = pd.DataFrame({'Pounds': [pounds], 
                               'Total Items Collected': [total_items_collected], 
                               'People': [people]})
    
    # Standardize the features
    input_data_scaled = scaler.transform(input_data)

    # Make a prediction with the SVM model
    prediction = svm_model.predict(input_data_scaled)

    # Translate the prediction to a more readable format
    if prediction == 0:
        return "No immediate cleanup required"
    else:
        return "Immediate cleanup required"

# Example of using the function
# Example of using the function
example_pounds = 3# Example value for pounds
example_total_items = 57  # Example value for total items collected
example_people = 2  # Example value for people

# Using the updated function
prediction_svm = predict_cleanup_requirement_svm(example_pounds, example_total_items, example_people)
print(prediction_svm)


No immediate cleanup required
