In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('AirQuality.csv', sep=';', decimal=',', encoding='latin1')
print(df.head())

df.isnull().sum()

print(df.fillna(df.mean(numeric_only=True), inplace=True))

sub_1 = df[['Date', 'Time', 'CO(GT)']]
sub_2 = df[['Date', 'PT08.S1(CO)', 'NO2(GT)']]
data_integrate = pd.merge(sub_1, sub_2, on='Date')
print(data_integrate)

data_integrate = pd.merge(sub_1, sub_2, on='Date', how='inner')
data_integrate.dropna()

data_integrate.info()
t_data = data_integrate.head().transpose()

In [None]:
# Plotting a bar graph
plt.figure(figsize=(10, 6))

columns_to_plot = ['CO(GT)', 'PT08.S1(CO)', 'NO2(GT)']  # Adjust columns as needed

# Prepare the x locations
x = np.arange(len(data_integrate['Date'][:10]))  # positions for the bars
width = 0.25  # width of each bar

# Plot each column with offset
for i, column in enumerate(columns_to_plot):
    if column in data_integrate.columns:
        plt.bar(x + i * width, data_integrate[column][:10], width=width, label=column)

plt.xlabel('Date')
plt.ylabel('Values')
plt.title('Air Quality Parameters Over Time (Bar Graph)')
plt.xticks(x + width, data_integrate['Date'][:10], rotation=45)
plt.legend(loc='upper right', bbox_to_anchor=(1.2, 1))
plt.tight_layout()
plt.show()

In [None]:
# Model Building Section
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Prepare the data
X = data_integrate[['PT08.S1(CO)', 'NO2(GT)']]
y = data_integrate['CO(GT)']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'R² Score: {r2:.2f}')

# Plot predictions vs actual
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.xlabel('Actual CO(GT)')
plt.ylabel('Predicted CO(GT)')
plt.title('Actual vs Predicted CO(GT)')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.tight_layout()
plt.show()