#clustering based

#1st

In [None]:
import folium    #interactive map for the data visulization
import pandas as pd
from sklearn.cluster import KMeans

# Load the sample dataset
data = pd.read_csv('waste_data.csv')

# Extract latitude and longitude columns
coordinates = data[['Latitude', 'Longitude']]

In [None]:
# Number of clusters (you can adjust this based on your needs)
num_clusters = 5

In [None]:
# Create a K-Means clustering model
kmeans = KMeans(n_clusters=num_clusters, random_state=0)
data['Cluster'] = kmeans.fit_predict(coordinates)



In [None]:
# Create a folium map centered at a location within your dataset
map_center = [data['Latitude'].mean(), data['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=13)

In [None]:
# Define cluster colors
cluster_colors = ['red', 'blue', 'green', 'purple', 'orange']

In [None]:
# Add markers for waste collection points with different cluster colors
for cluster in range(num_clusters):
    cluster_data = data[data['Cluster'] == cluster]
    for index, row in cluster_data.iterrows():
        folium.CircleMarker(
            location=[row['Latitude'], row['Longitude']],
            radius=5,
            color=cluster_colors[cluster],
            fill=True,
            fill_color=cluster_colors[cluster],
            fill_opacity=0.7,
            popup=f'Cluster: {cluster}, Type: {row["WasteType"]}, Volume: {row["WasteVolume"]}'
        ).add_to(m)

# Display the map directly in a Jupyter Notebook
m

In [None]:
pip install scikit-learn pandas


#2nd

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('waste_air_quality_measurement.csv')

In [None]:
# Convert the 'DateTime' column to datetime format
data['DateTime'] = pd.to_datetime(data['DateTime'])

In [None]:
print(data.head())

In [None]:
# Basic data summary
print("Dataset Summary:")
print(data.head())

In [None]:
print("\nDataset Information:")
print(data.info())

In [None]:
# Analyze waste management data
print("\nWaste Management Summary:")
waste_summary = data[['WasteType', 'CollectionFrequency', 'WasteVolume']].describe()
print(waste_summary)


In [None]:
# Analyze air quality data
print("\nAir Quality Summary:")
air_quality_summary = data[['AQI', 'PM2.5', 'CO', 'NO2', 'SO2', 'O3']].describe()
print(air_quality_summary)



In [None]:
# Visualize air quality over time
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)
data[['AQI', 'PM2.5', 'CO', 'NO2', 'SO2', 'O3']].plot(title='Air Quality Over Time')
plt.xlabel('DateTime')
plt.ylabel('Value')
plt.grid()
plt.legend(loc='upper left')
plt.show()

#3rd

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('waste_air_quality_measurement.csv')

In [None]:
# Assuming you have features like temperature, humidity, and time
X = data[['Temperature', 'Humidity']].values
y = data['Site_Location'].values

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
"""
if we having issue regaring conversion from string to float or int
import pandas as pd

# Create a sample DataFrame
data = pd.DataFrame({'DateTime': ['2023-09-01 8:00:00', '2023-09-01 09:00:00', '2023-09-01 10:00:00', '2023-09-01 11:00:00', '2023-09-01 12:00:00', '2023-09-01 13:00:00]})

# Convert the 'DateTime' column to datetime format
data['DateTime'] = pd.to_datetime(data['DateTime'])

# Convert the 'DateTime' column to float
data['FloatDateTime'] = data['DateTime'].apply(lambda x: float(x.timestamp()))

# Print the DataFrame
print(data)
"""


In [None]:
# Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)



In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

In [None]:
# Plot the predicted vs. actual values
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Air Quality Index")
plt.ylabel("Predicted Air Quality Index")
plt.title("Actual vs. Predicted Air Quality Index")
plt.show()


In [None]:
import matplotlib.pyplot as plt


pollutants = ['PM2.5', 'PM10', 'NO2', 'CO', 'SO2', 'O3']   ## Sample air quality data (replace with your own data)
concentration = [20, 25, 12, 2, 5, 30]

# Create a bar graph
plt.figure(figsize=(10, 6))  # Adjust the figure size as needed

plt.bar(pollutants, concentration, color='skyblue')
plt.xlabel('Pollutants')
plt.ylabel('Concentration (µg/m³ or ppm)')
plt.title('Air Quality by Pollutant')


plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Display the graph
plt.tight_layout()
plt.show()
