#  Use case 3:Forecasting Predominant Waste Types with Latitude and Longitude

# Model:Decision Tree

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = ['Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles','Year','Day','Month','Cleanup ID','Latitude','Longitude','Group Name','Zone','State']

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']]  # Features
y = data['Most_Common_Trash']  # Target

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42,
    max_depth=10,  # Allow the tree to be deeper
    min_samples_split=20,  # Require fewer samples to split a node
    min_samples_leaf=10,  # Require fewer samples at a leaf node
    max_features='auto',  # Limit the number of features to consider for the best split
    max_leaf_nodes=50 ) # Allow more leaf nodes)



# Train the classifier
dt_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_classifier.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Precision, Recall, and F1 Score, expressed as a percentage
precision = precision_score(y_test, y_pred, average='weighted') * 100  
recall = recall_score(y_test, y_pred, average='weighted') * 100
f1 = f1_score(y_test, y_pred, average='weighted') * 100

# Print out the results
print(f"Accuracy of the classifier: {accuracy:.2%}")
training_accuracy = accuracy_score(y_train, dt_classifier.predict(X_train))
validation_accuracy = accuracy_score(y_test, y_pred)
print(f"Training Accuracy: {training_accuracy:.2%}")
print(f"Validation Accuracy: {validation_accuracy:.2%}")

print(f"Precision: {precision:.2f}%")
print(f"Recall: {recall:.2f}%")
print(f"F1 Score: {f1:.2f}%")


Accuracy of the classifier: 35.55%
Training Accuracy: 35.19%
Validation Accuracy: 35.55%
Precision: 31.01%
Recall: 35.55%
F1 Score: 27.12%


In [2]:

# Function to predict the most common trash item based on given latitude and longitude
def predict_trash_item(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  
longitude_value = -121.974758  

# Predict the trash item for the provided coordinates
predicted_trash_item = predict_trash_item(latitude_value, longitude_value, dt_classifier)
print(f"Predicted most common trash item: {predicted_trash_item}")

Predicted most common trash item: Cigarette Butts


# Model:Random Forest

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = ['Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles','Year','Day','Month','Cleanup ID','Latitude','Longitude','Group Name','Zone','State']

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']]  # Features
y = data['Most_Common_Trash']  # Target

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt_classifier = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=2, min_samples_leaf=1, random_state=42)
#dt_classifier = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10, min_samples_leaf=5, random_state=42)




# Train the classifier
dt_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_classifier.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Precision, Recall, and F1 Score, expressed as a percentage
precision = precision_score(y_test, y_pred, average='weighted') * 100  # Multiplied by 100 to convert to percentage
recall = recall_score(y_test, y_pred, average='weighted') * 100
f1 = f1_score(y_test, y_pred, average='weighted') * 100

training_accuracy = accuracy_score(y_train, dt_classifier.predict(X_train))
validation_accuracy = accuracy_score(y_test, y_pred)

# Print out the results
print(f"Accuracy of the classifier: {accuracy:.2%}")
print(f"Training Accuracy: {training_accuracy:.2%}")
print(f"Validation Accuracy: {validation_accuracy:.2%}")
print(f"Precision: {precision:.2f}%")
print(f"Recall: {recall:.2f}%")
print(f"F1 Score: {f1:.2f}%")

Accuracy of the classifier: 41.21%
Training Accuracy: 45.58%
Validation Accuracy: 41.21%
Precision: 41.49%
Recall: 41.21%
F1 Score: 33.62%


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = ['Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles','Year','Day','Month','Cleanup ID','Latitude','Longitude','Group Name','Zone','State']

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']]  # Features
y = data['Most_Common_Trash']  # Target

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt_classifier = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=2, min_samples_leaf=1, random_state=42)
#dt_classifier = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10, min_samples_leaf=5, random_state=42)


#dt_classifier = LinearSVC(C=1.0, random_state=42, dual=False)

# Train the classifier
dt_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_classifier.predict(X_test)

# Function to predict the most common trash item based on given latitude and longitude
def predict_trash_item(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  
longitude_value = -121.974758 

# Predict the trash item for the provided coordinates
predicted_trash_item = predict_trash_item(latitude_value, longitude_value, dt_classifier)

# Print out the results

print(f"Predicted most common trash item: {predicted_trash_item}")


Predicted most common trash item: Cigarette Butts


# Finding best parameters 

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split




# Use a subset of the data for faster experimentation
X_train_sub, _, y_train_sub, _ = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Define a smaller grid of hyperparameters
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

# Enable parallelization with n_jobs
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_sub, y_train_sub)

best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")
## Traing and testing the model with best parameters


Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = ['Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles','Year','Day','Month','Cleanup ID','Latitude','Longitude','Group Name','Zone','State']

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']]  # Features
y = data['Most_Common_Trash']  # Target

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt_classifier = RandomForestClassifier(n_estimators=100, max_depth=None, min_samples_split=5, min_samples_leaf=2, random_state=42)
#dt_classifier = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10, min_samples_leaf=5, random_state=42)


#dt_classifier = LinearSVC(C=1.0, random_state=42, dual=False)

# Train the classifier
dt_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_classifier.predict(X_test)

# Function to predict the most common trash item based on given latitude and longitude
def predict_trash_item(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  
longitude_value = -121.974758  

# Predict the trash item for the provided coordinates
predicted_trash_item = predict_trash_item(latitude_value, longitude_value, dt_classifier)

# Print out the results
accuracy = accuracy_score(y_test, y_pred)

training_accuracy = accuracy_score(y_train, dt_classifier.predict(X_train))
validation_accuracy = accuracy_score(y_test, y_pred)

print(f"Predicted most common trash item: {predicted_trash_item}")
print(f"Accuracy of the classifier: {accuracy:.2%}")
print(f"Training Accuracy: {training_accuracy:.2%}")
print(f"Validation Accuracy: {validation_accuracy:.2%}")


Predicted most common trash item: Grocery Bags (Plastic)
Accuracy of the classifier: 42.63%
Training Accuracy: 77.30%
Validation Accuracy: 42.63%


# Finding best accuracy at each epoch through early stopping

In [7]:
# Initialize the RandomForestClassifier with early stopping
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=None, min_samples_split=5, min_samples_leaf=1, random_state=42)

best_accuracy = 0.0
best_round = 0
early_stopping_rounds = 10

for epoch in range(100):  
    rf_classifier.fit(X_train, y_train)

    # Predict on the test set
    y_pred = rf_classifier.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Print the accuracy for each epoch
    print(f"Epoch {epoch + 1}, Accuracy: {accuracy:.2%}")

    # Check for early stopping
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_round = epoch + 1
    elif epoch + 1 - best_round >= early_stopping_rounds:
        print(f"Early stopping at epoch {epoch + 1} with the best accuracy of {best_accuracy:.2%}")
        break

Epoch 1, Accuracy: 42.59%
Epoch 2, Accuracy: 42.59%
Epoch 3, Accuracy: 42.59%
Epoch 4, Accuracy: 42.59%
Epoch 5, Accuracy: 42.59%
Epoch 6, Accuracy: 42.59%
Epoch 7, Accuracy: 42.59%
Epoch 8, Accuracy: 42.59%
Epoch 9, Accuracy: 42.59%
Epoch 10, Accuracy: 42.59%
Epoch 11, Accuracy: 42.59%
Early stopping at epoch 11 with the best accuracy of 42.59%


In [8]:
# Function to predict the most common trash item based on given latitude and longitude
def predict_trash_item(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  
longitude_value = -121.974758  

# Predict the trash item for the provided coordinates
predicted_trash_item = predict_trash_item(latitude_value, longitude_value, dt_classifier)

# Print out the results
print(f"Predicted most common trash item: {predicted_trash_item}")


Predicted most common trash item: Grocery Bags (Plastic)


# Model 3:Logistic Regression

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = ['Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles', 'Year', 'Day', 'Month', 'Cleanup ID', 'Latitude', 'Longitude', 'Group Name', 'Zone', 'State']

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']]  # Features
y = data['Most_Common_Trash']  # Target

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Logistic Regression Classifier
logistic_classifier = LogisticRegression(random_state=42)

# Train the classifier
logistic_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = logistic_classifier.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
# Precision, Recall, and F1 Score, expressed as a percentage
precision = precision_score(y_test, y_pred, average='weighted') * 100  # Multiplied by 100 to convert to percentage
recall = recall_score(y_test, y_pred, average='weighted') * 100
f1 = f1_score(y_test, y_pred, average='weighted') * 100

# Print out the results
print(f"Accuracy of the classifier: {accuracy:.2%}")
training_accuracy = accuracy_score(y_train, logistic_classifier.predict(X_train))
validation_accuracy = accuracy_score(y_test, y_pred)
print(f"Training Accuracy: {training_accuracy:.2%}")
print(f"Validation Accuracy: {validation_accuracy:.2%}")

print(f"Precision: {precision:.2f}%")
print(f"Recall: {recall:.2f}%")
print(f"F1 Score: {f1:.2f}%")

Accuracy of the classifier: 31.02%
Training Accuracy: 30.36%
Validation Accuracy: 31.02%
Precision: 16.16%
Recall: 31.02%
F1 Score: 20.95%


In [10]:
# Function to predict the most common trash item based on given latitude and longitude using Logistic Regression
def predict_trash_item(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  # Replace with actual latitude
longitude_value = -121.974758  # Replace with actual longitude

# Predict the trash item for the provided coordinates using Logistic Regression model
predicted_trash_item = predict_trash_item(latitude_value, longitude_value, logistic_classifier)
print(f"Predicted most common trash item: {predicted_trash_item}")


Predicted most common trash item: Plastic Pieces


# Model 4 :KNN

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = ['Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles', 'Year', 'Day', 'Month', 'Cleanup ID', 'Latitude', 'Longitude', 'Group Name', 'Zone', 'State']

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']] 
y = data['Most_Common_Trash']  

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k) as needed

# Train the classifier
knn_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = knn_classifier.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
# Precision, Recall, and F1 Score, expressed as a percentage
precision = precision_score(y_test, y_pred, average='weighted') * 100  # Multiplied by 100 to convert to percentage
recall = recall_score(y_test, y_pred, average='weighted') * 100
f1 = f1_score(y_test, y_pred, average='weighted') * 100
# Print out the results
print(f"Accuracy of the classifier: {accuracy:.2%}")
training_accuracy = accuracy_score(y_train, knn_classifier.predict(X_train))
validation_accuracy = accuracy_score(y_test, y_pred)
print(f"Training Accuracy: {training_accuracy:.2%}")
print(f"Validation Accuracy: {validation_accuracy:.2%}")

print(f"Precision: {precision:.2f}%")
print(f"Recall: {recall:.2f}%")
print(f"F1 Score: {f1:.2f}%")

Accuracy of the classifier: 39.92%
Training Accuracy: 55.09%
Validation Accuracy: 39.92%
Precision: 37.99%
Recall: 39.92%
F1 Score: 38.32%


In [12]:
# Function to predict the most common trash item based on given latitude and longitude using KNN
def predict_trash_item_knn(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  # Replace with actual latitude
longitude_value = -121.974758  # Replace with actual longitude

# Predict the trash item for the provided coordinates using KNN model
predicted_trash_item_knn = predict_trash_item_knn(latitude_value, longitude_value, knn_classifier)
print(f"Predicted most common trash item using KNN: {predicted_trash_item_knn}")


Predicted most common trash item using KNN: Grocery Bags (Plastic)


# Model 5 :SVM

In [13]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score

# Load the dataset
file_path = 'cleaned_ocean.csv'
data = pd.read_csv(file_path)

# Columns to exclude that are not trash items
excluded_columns = [
    'Total Items Collected', 'Adults', 'Children', 'People', 'Pounds', 'Miles', 
    'Year', 'Day', 'Month', 'Cleanup ID', 'Latitude', 'Longitude', 'Group Name', 
    'Zone', 'State'
]

# Identify columns related to trash items collected based on data type and excluding the non-trash item columns
trash_item_columns = [col for col in data.columns if data[col].dtype in ['int64', 'float64'] and col not in excluded_columns]

# Determine the most common trash item for each cleanup event
data['Most_Common_Trash'] = data[trash_item_columns].idxmax(axis=1)

# Prepare the features and target
X = data[['Latitude', 'Longitude']]  # Features
y = data['Most_Common_Trash']  # Target

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reduce the data using PCA
pca = PCA(n_components=2)  # Since we only have 2 features, we'll keep them both
X_pca = pca.fit_transform(X_scaled)

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Initialize the SVM Classifier with optimized parameters
svm_classifier = SVC(kernel='linear', C=1.0, random_state=42, cache_size=7000)  # Increased cache size

# Train the classifier
svm_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = svm_classifier.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print out the results
accuracy, training_accuracy, validation_accuracy = (
    accuracy_score(y_test, y_pred),
    accuracy_score(y_train, svm_classifier.predict(X_train)),
    accuracy_score(y_test, y_pred)
)

accuracy, training_accuracy, validation_accuracy


# Calculate Precision, Recall, and F1 Score
precision = precision_score(y_test, y_pred, average='weighted') * 100
recall = recall_score(y_test, y_pred, average='weighted') * 100
f1 = f1_score(y_test, y_pred, average='weighted') * 100

# Print out the results with accuracy, precision, recall, and F1 score
print(f"Accuracy of the classifier: {accuracy*100:.2f}%")
print(f"Training Accuracy: {training_accuracy*100:.2f}%")
print(f"Validation Accuracy: {validation_accuracy*100:.2f}%")
print(f"Precision: {precision:.2f}%")
print(f"Recall: {recall:.2f}%")
print(f"F1 Score: {f1:.2f}%")

Accuracy of the classifier: 29.60%
Training Accuracy: 29.01%
Validation Accuracy: 29.60%
Precision: 15.42%
Recall: 29.60%
F1 Score: 19.83%


In [14]:
# Function to predict the most common trash item based on given latitude and longitude using SVM
def predict_trash_item_svm(latitude, longitude, model):
    # Create a DataFrame with the input features
    input_features = pd.DataFrame([[latitude, longitude]], columns=['Latitude', 'Longitude'])
    
    # Use the model to predict the most common trash item
    prediction = model.predict(input_features)
    
    return prediction[0]

# Example coordinates
latitude_value = 38.187609  
longitude_value = -121.974758  

# Predict the trash item for the provided coordinates using SVM model
predicted_trash_item_svm = predict_trash_item_svm(latitude_value, longitude_value, svm_classifier)
print(f"Predicted most common trash item using SVM: {predicted_trash_item_svm}")


Predicted most common trash item using SVM: Cigarette Butts
