In [None]:
# Import the libraries 
import pandas as pd
import numpy as np

# Cleaning the CSV file for a single coil

In [None]:
def coil(coil_number,stage):
    # Load one csv for the coil at the stage B4 or B5
    df = pd.read_csv(f'SignalExport/{coil_number}B{stage}.csv')
    # Turn it into the list and split it by semicolon
    df= list(df.columns)[0].split(";")
    # Finding index for the length and width
    li=df.index("Lengthpoints:") 
    wi=df.index("Values")
    # Slicing the dataframe into 4 lists 
    coil_num =df[0]
    coil_stage = df[1]
    length=df[li+1:wi]   
    width=df[wi+1:-1]
    # Turning length and width measurements from a string into a float 
    length = [float(i) for i in length]
    width = [float(i) for i in width]
    # Duplicate the coil number for the number of observations in the length 
    mult_coil_values= [coil_num for i in range(len(length))]
    mult_coil_stage_values = [coil_stage for i in range(len(length))]
    # Creating an empty dataframe 
    my_df = pd.DataFrame()
    # Turn in the lists into the series  
    mult_coil_values = pd.Series(mult_coil_values)
    coil_stage_values= pd.Series(mult_coil_stage_values)
    length_values = pd.Series(length)
    width_values = pd.Series(width)
    # Creating columns and fill them with the series 
    my_df['coil_number'] = mult_coil_values
    my_df['coil_stage'] = mult_coil_stage_values
    my_df['length'] = length_values
    my_df['width'] = width_values
    # Drop the zeros
    my_df = my_df[(my_df != 0).all(1)]
    # Looking at only 140-170 m length
    new_df= my_df.loc[(my_df['length'] >= 140) & (my_df['length'] <= 170)]
    # Turning the float numbers into an integer to later group it by the meter
    new_df["length_m"] = new_df["length"].astype(int)
    return new_df

# Creating a graph for a single coil

In [None]:
# Creating a line graph that visualises the width change for B4 and B5 at the length between 140 and 170mm
B4_df = coil(coil_number,stage = 4)
B5_df = coil(coil_number,stage = 5)

import matplotlib.pyplot as plt
plt.plot(B4_df.length,B4_df.width,"g",label ="B4")
plt.plot(B5_df.length,B5_df.width,"r",label ="B5")
plt.rcParams["figure.figsize"] = (8,10)
plt.title(f"Coil number {coil_number}")
plt.legend()
plt.show()

# Function to calculate the difference in width and determine the biggest difference

In [None]:
# The number of measurements in B4 and B5 vary. That's why we group the width measuments by each meter and take the average
# The maximum difference is the constriction 
def calculating_difference():
    # Estimating average width at each meter
    average_B4 = pd.Series(B4_df.groupby('length_m')['width'].mean())
    average_B5 = pd.Series(B5_df.groupby('length_m')['width'].mean())
    # Creating a dataframe where we see the averages of B4 and B5 and the differences 
    two_df = pd.DataFrame()
    two_df['Average_B4'],two_df['Average_B5'] =[average_B4, average_B5]
    two_df["difference"] = two_df["Average_B5"] - two_df["Average_B4"]
    two_df = two_df.rename_axis('Length').reset_index()
    constriction = two_df.difference.min()
    return coil_number, constriction 

# Reading all the CSV files with errors in them 
### We use the csv file that was created in another notebook

In [None]:
# This is the number of files that have no errors in them (missing measurements and files with no data a)
masud_df = pd.read_csv("coil_to_used.csv")
masud_list = list(masud_df.coilno)
len(masud_list)

# Calculating the constriction for every file 

In [None]:
# Iterating through all the files to compute the difference and save it in a list
another_list = []
for coil_number in masud_list:
    B4_df = coil(coil_number,stage=4)
    B5_df = coil(coil_number,stage=5)
    differ = calculating_difference()
    another_list.append(differ)

In [None]:
# Check the list that we just created
another_list

In [None]:
# Create a dataframe with a column with the difference between the poits at B4 and B5 stage
another_df = pd.DataFrame(another_list, columns=["coil",'difference'])
another_df.head()

In [None]:
# Create a conditional column based on 4.5mm difference and assign dummies(0,1) for the constriction
another_df["constriction"] = np.where(another_df['difference'] < -4.5, 1, 0)
another_df.head(30)

In [None]:
# Check how many constrictions we have in the data at this point 
another_df.loc[another_df["constriction"] == 1, "constriction"].count

# Merging it with the main data and saving it as csv file

In [None]:
# Loading the main file we are going to use for building the model
master_df = pd.read_csv(r"C:\Users\Fidrmuc\Downloads\SignalExport\CoilData.csv")
master_df.shape

In [None]:
# Merging the contriction data with the csv file above(master file)
final_df = pd.merge(master_df, another_df, on="coil")
final_df.head()
final_df.to_csv("Final_4andhalf.csv")