In [22]:
#impirting the packages:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [36]:
# === Parameters ===
file_path = r"C:\Users\gura1\OneDrive - post.bgu.ac.il\M.A\Year 1\2nd Semester 2025\Envirotech  do it yourself sensors for environmental research\LOG1.TXT"
save_dir = r"C:\Users\gura1\OneDrive - post.bgu.ac.il\M.A\Year 1\2nd Semester 2025\Envirotech  do it yourself sensors for environmental research\graphs"
start_datetime = datetime(2025, 5, 14, 16, 16, 0)

os.makedirs(save_dir, exist_ok=True)

# === Load & Clean Data ===
columns = ['Time_ms', 'SHTC3_Temp', 'SHTC3_Humidity', 'SCD4x_Temp', 'SCD4x_Humidity', 'SCD4x_CO2']
df = pd.read_csv(file_path, names=columns)

# Convert all to numeric and drop bad rows
df = df.apply(pd.to_numeric, errors='coerce')
df = df.dropna()

# Remove zero values (outliers) from any sensor readings because sometimes
df = df[
    (df['SHTC3_Temp'] != 0) &
    (df['SHTC3_Humidity'] != 0) &
    (df['SCD4x_Temp'] != 0) &
    (df['SCD4x_Humidity'] != 0) &
    (df['SCD4x_CO2'] != 0)
]

# Add timestamp column
df['Timestamp'] = df['Time_ms'].apply(lambda ms: start_datetime + timedelta(milliseconds=ms))

# === Save Cleaned Data ===
output_excel = os.path.join(save_dir, "cleaned_data.xlsx")
output_csv = os.path.join(save_dir, "cleaned_data.csv")

df.to_excel(output_excel, index=False)
df.to_csv(output_csv, index=False)
print("✅ Cleaned data saved to Excel and CSV")

# === Plotting Functions ===
def plot_combined(y1, y2, y1_label, y2_label, title, ylabel, filename):
    plt.figure(figsize=(12, 6))
    sns.lineplot(x=df['Timestamp'], y=y1, label=y1_label)
    sns.lineplot(x=df['Timestamp'], y=y2, label=y2_label)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel(ylabel)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.legend()
    plt.savefig(os.path.join(save_dir, filename))
    plt.close()

def plot_single(x, y, title, ylabel, filename):
    plt.figure(figsize=(12, 6))
    sns.lineplot(x=x, y=y)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel(ylabel)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, filename))
    plt.close()

def correlation_plot(x, y, xlabel, ylabel, title, filename):
    plt.figure(figsize=(8, 6))
    sns.regplot(x=x, y=y, line_kws={"color": "red"})
    model = LinearRegression().fit(x.values.reshape(-1, 1), y.values)
    r2 = r2_score(y, model.predict(x.values.reshape(-1, 1)))
    plt.title(f"{title}\nR² = {r2:.3f}")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, filename))
    plt.close()
    print(f'{title} -> R² = {r2:.3f}')


✅ Cleaned data saved to Excel and CSV


In [37]:
# === Temperature Plot ===
plot_combined(df['SHTC3_Temp'], df['SCD4x_Temp'],
              'SHTC3 Temp', 'SCD4x Temp',
              'Temperature Over Time (SHTC3 vs SCD4x)',
              'Temperature (°C)', 'temperature_comparison.png')

In [38]:
# === Humidity Plot ===
plot_combined(df['SHTC3_Humidity'], df['SCD4x_Humidity'],
              'SHTC3 Humidity', 'SCD4x Humidity',
              'Humidity Over Time (SHTC3 vs SCD4x)',
              'Relative Humidity (%)', 'humidity_comparison.png')

In [39]:
# === CO2 Plot ===
plot_single(df['Timestamp'], df['SCD4x_CO2'],
            'CO2 Concentration Over Time (SCD4x)',
            'CO2 (ppm)', 'co2_concentration.png')

In [40]:
# === Correlation Plots ===
correlation_plot(df['SHTC3_Temp'], df['SCD4x_Temp'],
                 'SHTC3 Temperature (°C)', 'SCD4x Temperature (°C)',
                 'Correlation: Temperature SHTC3 vs SCD4x',
                 'temp_corr.png')

correlation_plot(df['SHTC3_Humidity'], df['SCD4x_Humidity'],
                 'SHTC3 Humidity (%)', 'SCD4x Humidity (%)',
                 'Correlation: Humidity SHTC3 vs SCD4x',
                 'humidity_corr.png')

print("✅ All graphs and Excel file saved to:", save_dir)

Correlation: Temperature SHTC3 vs SCD4x -> R² = 0.974
Correlation: Humidity SHTC3 vs SCD4x -> R² = 0.995
✅ All graphs and Excel file saved to: C:\Users\gura1\OneDrive - post.bgu.ac.il\M.A\Year 1\2nd Semester 2025\Envirotech  do it yourself sensors for environmental research\graphs
