In [5]:
import pandas as pd
import numpy as np

roll_number = 102313042
alpha = 0.05 * (roll_number % 7)
beta = 0.3 * ((roll_number % 5) + 1)

print(f"Roll Number Used : {roll_number}")
print(f"alpha value     : {alpha}")
print(f"beta value      : {beta}")

dataset_location = "/kaggle/input/india-air-quality-data/data.csv"

try:
    air_quality = pd.read_csv(
        dataset_location,
        encoding="latin1",
        low_memory=False,
        on_bad_lines="skip"
    )
    air_quality.columns = air_quality.columns.str.strip()

   
    no2_column = None
    for column in air_quality.columns:
        if column.lower() == "no2":
            no2_column = column
            break

    if no2_column is None:
        print("\nNO2 column not available in the dataset.")
        print("Columns found:", air_quality.columns.tolist())

    else:
        print(f"\nSelected column: {no2_column}")

       
        air_quality[no2_column] = pd.to_numeric(
            air_quality[no2_column],
            errors="coerce"
        )
        processed_data = air_quality[[no2_column]].dropna()
        processed_data = processed_data.rename(
            columns={no2_column: "x"}
        )
        print(f"Number of valid records: {len(processed_data)}")
        processed_data["z"] = (
            processed_data["x"]
            + alpha * np.sin(beta * processed_data["x"])
        )

        print("\nTransformed data (sample):")
        print(processed_data.head())

        z_series = processed_data["z"]

        mean_value = z_series.mean()
        variance_value = z_series.var()
        std_deviation = z_series.std()

        lambda_estimate = 1 / (2 * variance_value)
        normalization_const = 1 / (
            std_deviation * np.sqrt(2 * np.pi)
        )

        print("\n" + "-" * 40)
        print("Step 4: Estimated Statistical Parameters")
        print("-" * 40)
        print(f"Mean (μ)      : {mean_value}")
        print(f"Lambda (λ)    : {lambda_estimate}")
        print(f"Constant (c)  : {normalization_const}")

except FileNotFoundError:
    print("Dataset file not found. Verify the path.")
except Exception as error:
    print("Execution failed due to:", error)


Roll Number Used : 102313042
alpha value     : 0.30000000000000004
beta value      : 0.8999999999999999

Selected column: no2
Number of valid records: 419509

Transformed data (sample):
      x          z
0  17.4  17.414383
1   7.0   7.005044
2  28.5  28.648350
3  14.7  14.884794
4   7.5   7.635013

----------------------------------------
Step 4: Estimated Statistical Parameters
----------------------------------------
Mean (μ)      : 25.80132545300301
Lambda (λ)    : 0.0014587855482436996
Constant (c)  : 0.021548685849211567
