In [4]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#@title Load the Dataset
url = "https://raw.githubusercontent.com/FuTSA23/time-series-analysis-datasets/da059e9f430e1337f7b437609ad2488480465882/Complete_Blasting_info%20(1).csv"
df = pd.read_csv(url)
df.head()

In [None]:
#@title Question 1:How can you plot the time-series?
df['Time'] = pd.to_datetime(df['Time'])
plt.figure(figsize=(50, 20))
plt.plot(df['Time'], df['PM10 (µg/m3)'], label='PM10')
plt.xlabel('Time')
plt.ylabel('PM10')
plt.title('Time-Series of PM10 Data')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
#@title Question 1:Removing NA Values

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset from the provided CSV file
url = "https://raw.githubusercontent.com/FuTSA23/time-series-analysis-datasets/da059e9f430e1337f7b437609ad2488480465882/Complete_Blasting_info%20(1).csv"
df = pd.read_csv(url)

# Convert the 'Unnamed: 0' column to datetime format
df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])

# Set the 'Unnamed: 0' column as the index for easy interpolation
df.set_index('Unnamed: 0', inplace=True)

# Interpolate the missing values using linear interpolation with limit_direction='forward'
df_interpolated = df.interpolate(method='linear', limit_direction='forward')

# Plot the original and interpolated time-series for PM10 data
plt.figure(figsize=(50, 20))
plt.plot(df.index, df['PM10 (µg/m3)'], marker='o', markersize=3, linestyle='-', label='Original PM10')
plt.plot(df_interpolated.index, df_interpolated['PM10 (µg/m3)'], marker='x', markersize=3, linestyle='-', label='Interpolated PM10')
plt.xlabel('Time')
plt.ylabel('PM10 (µg/m3)')
plt.title('Time-Series of PM10 Data with Interpolation')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#@title Question 1: Multi-variate Time Series

plt.figure(figsize=(12, 6))

# Plot PM10 data
plt.subplot(2, 1, 1)
plt.plot(df['Date'], df['PM10 (µg/m3)'], marker='o', markersize=3, linestyle='-', label='PM10')
plt.xlabel('Date')
plt.ylabel('PM10 (µg/m3)')
plt.title('Time-Series of PM10 Data')
plt.legend()
plt.grid(True)

# Plot PM2.5 data
plt.subplot(2, 1, 2)
plt.plot(df['Date'], df['PM2.5 (µg/m3)'], marker='x', markersize=3, linestyle='-', label='PM2.5')
plt.xlabel('Date')
plt.ylabel('PM2.5 (µg/m3)')
plt.title('Time-Series of PM2.5 Data')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
#@title Question 1: Individual time-series for different air polluting factors

# Plot individual time-series for PM10, PM2.5, and NO2 on the same graph
plt.figure(figsize=(12, 6))

# Plot PM10 data
plt.plot(df['Date'], df['PM10 (µg/m3)'], marker='o', markersize=3, linestyle='-', label='PM10')

# Plot PM2.5 data
plt.plot(df['Date'], df['PM2.5 (µg/m3)'], marker='x', markersize=3, linestyle='-', label='PM2.5')

# Plot NO2 data
plt.plot(df['Date'], df['NO2 (µg/m3)'], marker='s', markersize=3, linestyle='-', label='NO2')

plt.xlabel('Date')
plt.ylabel('Concentration (µg/m3)')
plt.title('Time-Series of Air Pollutants')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#@title Question 1:ARMA /ARIMA PROCESSES

from statsmodels.tsa.arima.model import ARIMA

# Handling missing data using linear interpolation (optional)
df.interpolate(method='linear', inplace=True)

# Select the 'PM10 (µg/m3)' column for ARIMA analysis
pm10_data = df['PM10 (µg/m3)']

# Assuming ARIMA(1,1,1) model (you can adjust these orders based on ACF and PACF plots)
model = ARIMA(pm10_data, order=(1, 1, 1))
results = model.fit()

# Make predictions using the fitted model
forecast_start_date = pd.to_datetime('2023-02-01')
forecast_end_date = pd.to_datetime('2023-05-01')
forecast_index = pd.date_range(start=forecast_start_date, end=forecast_end_date, freq='D')
forecast_days = len(forecast_index)
forecast = results.predict(start=len(pm10_data), end=len(pm10_data) + forecast_days - 1, typ='levels')

# Plot the original and predicted time-series for 'PM10 (µg/m3)' column
plt.figure(figsize=(12, 6))
plt.plot(pm10_data.index, pm10_data, marker='o', markersize=3, linestyle='-', label='Original PM10')
plt.plot(forecast_index, forecast, marker='x', markersize=3, linestyle='-', label='ARIMA Forecasted PM10')
plt.xlabel('Date')
plt.ylabel('PM10 (µg/m3)')
plt.title('ARIMA Model Forecast for PM10 Data')
plt.legend()
plt.grid(True)
plt.xlim(forecast_start_date, forecast_end_date)
plt.show()


In [None]:
#@title Question 1: Linear Interpolation

# Handling missing data using linear interpolation
df.interpolate(method='linear', inplace=True)

# Plot the original and interpolated time-series for 'PM10 (µg/m3)' column
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['PM10 (µg/m3)'], marker='o', markersize=3, linestyle='-', label='Original PM10')
plt.plot(df.index, df['PM10 (µg/m3)'], marker='x', markersize=3, linestyle='-', label='Interpolated PM10')
plt.xlabel('Date')
plt.ylabel('PM10 (µg/m3)')
plt.title('Linear Interpolation for PM10 Data')
plt.legend()
plt.grid(True)

plt.show()

In [None]:
#@title Question 1: Cubic spline Interpolation

from scipy.interpolate import CubicSpline, interp1d

df.interpolate(method='linear', inplace=True)

# Apply cubic spline interpolation on the 'PM10 (µg/m3)' column
x = np.arange(len(df))  # Use index as x values
y = df['PM10 (µg/m3)'].values
cs = CubicSpline(x, y)

# Generate new x values for interpolation
xs = np.linspace(0, len(df) - 1, 100)
ys = cs(xs)

# Plot the original and cubic spline interpolated time-series for 'PM10 (µg/m3)' column
plt.plot(df.index, df['PM10 (µg/m3)'], 'o', label='data')
plt.plot(df.index, df['PM10 (µg/m3)'], label='Original PM10')
plt.plot(pd.to_datetime(df.index.min()) + pd.to_timedelta(xs, unit='D'), ys, label="Cubic Spline Interpolation", color='green')
plt.xlabel('Date')
plt.ylabel('PM10 (µg/m3)')
plt.title('Cubic Spline Interpolation for PM10 Data')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#@title Question 2: Blasting Time

blasting_data = df.between_time('13:45', '14:45').copy()  # Make a copy to avoid modifying the original DataFrame

# Combine the air polluting factors using a weighted combination to create a single time-series data for the blasting effect
# Let's assume that we use equal weights for all air polluting factors for simplicity
blasting_data['Blasting Effect'] = blasting_data['PM10 (µg/m3)'] + blasting_data['NO2 (µg/m3)']

# Display the data for the specific time range and the combined time-series data for the blasting effect
plt.figure(figsize=(12, 6))
plt.plot(blasting_data.index, blasting_data['Blasting Effect'], marker='.', linestyle='-', color='orange')
plt.xlabel('Time')
plt.ylabel('Blasting Effect')
plt.title('Time-Series of Blasting Effect')
plt.grid(True)
plt.show()


In [None]:
#@title Question 2: Histogram of Blast Trigger Times

# Extract the 'Minute' from the 'Time' column to get the minute of each data point
blasting_data['Minute'] = blasting_data.index.minute

# Plot the histogram of blast trigger times in minutes
plt.figure(figsize=(10, 6))
plt.hist(blasting_data['Minute'], bins=60, alpha=0.7, color='blue')
plt.xlabel('Minute of the Hour')
plt.ylabel('Frequency')
plt.title('Histogram of Blast Trigger Times (in Minutes)')
plt.xticks(range(0, 60, 5))  # Show x-axis ticks every 5 minutes for better readability
plt.grid(True)
plt.show()



In [None]:
#@title Question 2: Histogram of the detected blasting times across all months of data.
blasting_data['Hour'] = blasting_data.index.hour
blasting_data['Minute'] = blasting_data.index.minute

# Combine 'Hour' and 'Minute' to get the blasting times in hours and minutes
blasting_data['Blasting Time'] = blasting_data['Hour'] + blasting_data['Minute'] / 60

# Plot the histogram of the detected blasting times
plt.figure(figsize=(10, 6))
plt.hist(blasting_data['Blasting Time'], bins=24, alpha=0.7, color='green')
plt.xlabel('Blasting Time (hours)')
plt.ylabel('Frequency')
plt.title('Histogram of Detected Blasting Times')
plt.grid(True)
plt.show()


In [None]:
#@title Question 2: Probability of Blast Happening during 14:15 to 14:30

import scipy.stats as stats

# Analyze the histogram
plt.figure(figsize=(10, 6))
plt.hist(blasting_data['Blasting Time'], bins=24, alpha=0.7, color='green')
plt.xlabel('Blasting Time (hours)')
plt.ylabel('Frequency')
plt.title('Histogram of Detected Blasting Times')
plt.grid(True)
plt.show()

# QQ plot
plt.figure(figsize=(8, 8))
stats.probplot(blasting_data['Blasting Time'], dist='norm', plot=plt)
plt.xlabel('Theoretical Quantiles')
plt.ylabel('Sample Quantiles')
plt.title('QQ Plot of Blasting Times')
plt.grid(True)
plt.show()

# Calculate the probability of blasting time during 14:15 to 14:30
blasting_time_14_15_to_14_30 = blasting_data[(blasting_data['Blasting Time'] >= 14.25) & (blasting_data['Blasting Time'] <= 14.5)]
probability_blast_14_15_to_14_30 = len(blasting_time_14_15_to_14_30) / len(blasting_data)

print(f"Probability of Blast Happening during 14:15 to 14:30: {probability_blast_14_15_to_14_30:.2%}")


In [None]:
#@title Question 3: Classification

# Separate columns for stock and flow time series data
stock_columns = ['PM10 (µg/m3)', 'NO (µg/m3)', 'PM2.5 (µg/m3)', 'NO2 (µg/m3)']
flow_columns = ['NOX (ppb)', 'CO (mg/m3)', 'SO2 (µg/m3)', 'NH3 (µg/m3)', 'Ozone (µg/m3)', 'Benzene (µg/m3)']

# Plot stock time series data
plt.figure(figsize=(12, 6))
df[stock_columns].plot(marker='o', linestyle='-', ax=plt.gca())
plt.xlabel('Date and Time')
plt.ylabel('Concentration (µg/m3)')
plt.title('Stock Time Series Data: Air Pollution Parameters')
plt.grid(True)
plt.show()

# Plot flow time series data
plt.figure(figsize=(12, 6))
df[flow_columns].plot(marker='o', linestyle='-', ax=plt.gca())
plt.xlabel('Date and Time')
plt.ylabel('Concentration (ppb, mg/m3, or µg/m3)')
plt.title('Flow Time Series Data: Air Pollution Parameters')
plt.grid(True)
plt.show()


In [None]:
#@title Question 3: Curve fitting
from scipy.optimize import curve_fit
from scipy.interpolate import UnivariateSpline


# Select the 'PM10 (µg/m3)' column for curve fitting
pm10_data = df['PM10 (µg/m3)']

# Create a time array from 0 to the number of data points
time_array = np.arange(len(pm10_data))

# Parametric curve fitting (example: polynomial regression)
def polynomial_fit(x, a, b, c):
    return a * x**2 + b * x + c

# Fit a quadratic polynomial to the data
popt, _ = curve_fit(polynomial_fit, time_array, pm10_data.values.ravel())

# Non-parametric curve fitting (example: spline interpolation)
spline_fit = UnivariateSpline(time_array, pm10_data.values.ravel(), s=100)

# Plot the 'PM10 (µg/m3)' data and the fitted curves
plt.figure(figsize=(12, 6))
plt.plot(time_array, pm10_data.values.ravel(), marker='o', linestyle='', label='PM10 (µg/m3) Data')
plt.plot(time_array, polynomial_fit(time_array, *popt), label='Quadratic Polynomial Fit', color='orange')
plt.plot(time_array, spline_fit(time_array), label='Spline Interpolation', color='green')
plt.xlabel('Time')
plt.ylabel('PM10 (µg/m3)')
plt.title('PM10 (µg/m3) Data and Curve Fitting')
plt.legend()
plt.grid(True)
plt.show()

