In [3]:
import pandas as pd
import numpy as np
import os
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
# Specify the folder path
folder_path = r'C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\down_data'

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Check if the first column exists
        if df.shape[1] == 0:
            print(f"File {filename} has no columns.")
            continue
        
        # Split the data in the first column by semicolon
        try:
            split_data = df.iloc[:, 0].str.split(';', expand=True)
        except AttributeError:
            print(f"Cannot split the first column of file {filename}; it may not be of string type.")
            continue

        # Print the shape of the split data to check the number of columns
        print(f"File {filename} split data shape: {split_data.shape}")

        # Assume the data has 16 columns; set column names according to your needs
        if split_data.shape[1] != 16:
            print(f"Number of columns in the split data of file {filename} does not match the expected: {split_data.shape[1]} columns")
            continue

        split_data.columns = ['receive_ts', 'packet_ts', 'seqnr', 'sync status', 'TEC status', 
                              'missed frames', 'channel', 'numsens', 'wavelength 1', 
                              'wavelength 2', 'extra1', 'extra2', 'extra3', 'extra4', 
                              'extra5', 'extra6']

        # Merge the split data back into the original DataFrame
        df = pd.concat([df, split_data], axis=1)

        # If needed, drop the original column
        df.drop(columns=[df.columns[0]], inplace=True)
        
        # Save the processed data back to the CSV file
        df.to_csv(file_path, index=False)
        print(f"Processed file saved: {filename}\n")



File 2.1_75degrees.csv split data shape: (30059, 1)
Number of columns in the split data of file 2.1_75degrees.csv does not match the expected: 1 columns
File 2.2_65degrees.csv split data shape: (27392, 1)
Number of columns in the split data of file 2.2_65degrees.csv does not match the expected: 1 columns


  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)


File 2.3_55degrees.csv split data shape: (36140, 1)
Number of columns in the split data of file 2.3_55degrees.csv does not match the expected: 1 columns
File 2.4_45degrees.csv split data shape: (35582, 1)
Number of columns in the split data of file 2.4_45degrees.csv does not match the expected: 1 columns
File 2.5_35degrees.csv split data shape: (37442, 1)
Number of columns in the split data of file 2.5_35degrees.csv does not match the expected: 1 columns
File 2.6_25degrees.csv split data shape: (25969, 1)
Number of columns in the split data of file 2.6_25degrees.csv does not match the expected: 1 columns
File 2.7_15degrees.csv split data shape: (36467, 1)
Number of columns in the split data of file 2.7_15degrees.csv does not match the expected: 1 columns


  df = pd.read_csv(file_path)


In [7]:
import os
import pandas as pd
from scipy import stats

# Specify the folder path
folder_path = r'C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\down_data'
output_folder = r'C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down'
cleaned_folder = os.path.join(output_folder, 'cleaned')

# Create a new folder if it does not exist
os.makedirs(cleaned_folder, exist_ok=True)

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Read the CSV file
        df = pd.read_csv(file_path)

        # Ensure 'wavelength 1' and 'wavelength 2' columns are numeric
        df['wavelength 1'] = pd.to_numeric(df['wavelength 1'], errors='coerce')
        df['wavelength 2'] = pd.to_numeric(df['wavelength 2'], errors='coerce')

        # Remove outliers based on Z-score
        z_scores = stats.zscore(df[['wavelength 1', 'wavelength 2']], nan_policy='omit')  # Ignore missing values
        abs_z_scores = abs(z_scores)
        mask_z = (abs_z_scores <= 3).all(axis=1)  # Boolean indexing for all columns in each row
        
        # Keep rows where Z-scores are between -3 and 3
        df = df[mask_z]

        # Remove outliers based on IQR
        for col in ['wavelength 1', 'wavelength 2']:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]  # Keep rows within the range
        
        # Calculate and print means
        mean_wavelength1 = df['wavelength 1'].mean()
        mean_wavelength2 = df['wavelength 2'].mean()

        print(f"Mean after removing outliers (wavelength 1): {mean_wavelength1}")
        print(f"Mean after removing outliers (wavelength 2): {mean_wavelength2}")

        # Save the processed DataFrame to a new CSV file
        cleaned_file_path = os.path.join(cleaned_folder, filename)
        df.to_csv(cleaned_file_path, index=False)

        print(f"Processed file saved: {cleaned_file_path}")


Mean after removing outliers (wavelength 1): 1545.8721623299991
Mean after removing outliers (wavelength 2): 1550.5980616026554
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.1_75degrees.csv
Mean after removing outliers (wavelength 1): 1545.7349679818788
Mean after removing outliers (wavelength 2): 1550.4711508512107
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.2_65degrees.csv
Mean after removing outliers (wavelength 1): 1545.5927132948389
Mean after removing outliers (wavelength 2): 1550.3474506768287
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.3_55degrees.csv


  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)


Mean after removing outliers (wavelength 1): 1545.445264582203
Mean after removing outliers (wavelength 2): 1550.2241397997318
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.4_45degrees.csv
Mean after removing outliers (wavelength 1): 1545.3702574626143
Mean after removing outliers (wavelength 2): 1550.095001404262
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.5_35degrees.csv
Mean after removing outliers (wavelength 1): 1545.1334568608263
Mean after removing outliers (wavelength 2): 1549.9271363374944
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.6_25degrees.csv
Mean after removing outliers (wavelength 1): 1545.0160090120564
Mean after removing outliers (wavelength 2): 1549.780907626955
Processed file saved: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned\2.7_15degrees.csv


  df = pd.read_csv(file_path)


In [9]:
# Specify the folder path for cleaned data
cleaned_folder = r'C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\cleaned'
output_folder = r'C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down'
output_file = os.path.join(output_folder, 'down_data_mean.csv')

# Create the output folder if it does not exist
os.makedirs(output_folder, exist_ok=True)

# Create an empty list to store results
results = []

# Iterate through all cleaned CSV files in the folder
for filename in os.listdir(cleaned_folder):
    print(f"Processing file: {filename}")  # Print the current file name
    if filename.endswith('.csv'):
        # Construct the full file path
        file_path = os.path.join(cleaned_folder, filename)
        
        # Read the cleaned CSV file
        df = pd.read_csv(file_path)

        # Calculate the means
        mean_wavelength1 = df['wavelength 1'].mean()
        mean_wavelength2 = df['wavelength 2'].mean()

        # Extract the numerical part from the file name
        base_name = os.path.splitext(filename)[0]  # Remove the extension
        print(f"Base name: {base_name}")  # Print the base file name
        
        last_part = base_name.split('_')[-1]  # Get the last part
        print(f"Last part: {last_part}")  # Print the extracted last part
        
        # Convert to integer
        extracted_number = int(last_part) if last_part.isdigit() else None
        print(f"Extracted number: {extracted_number}")  # Print the extracted number

        # Append the result to the list
        results.append({
            'Temperature': extracted_number,
            'mean_wavelength_1': mean_wavelength1,
            'mean_wavelength_2': mean_wavelength2
        })

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to the specified path as a CSV file
results_df.to_csv(output_file, index=False)

print(f"Mean data has been saved to: {output_file}")


Processing file: 2.1_75degrees.csv
Base name: 2.1_75degrees
Last part: 75degrees
Extracted number: None
Processing file: 2.2_65degrees.csv
Base name: 2.2_65degrees
Last part: 65degrees
Extracted number: None
Processing file: 2.3_55degrees.csv
Base name: 2.3_55degrees
Last part: 55degrees
Extracted number: None
Processing file: 2.4_45degrees.csv
Base name: 2.4_45degrees
Last part: 45degrees
Extracted number: None
Processing file: 2.5_35degrees.csv
Base name: 2.5_35degrees
Last part: 35degrees
Extracted number: None
Processing file: 2.6_25degrees.csv
Base name: 2.6_25degrees
Last part: 25degrees
Extracted number: None
Processing file: 2.7_15degrees.csv
Base name: 2.7_15degrees
Last part: 15degrees
Extracted number: None
Mean data has been saved to: C:\Users\yil\Desktop\ipy project\week6-week8 lab project\FBG_down\down_data_mean.csv
