In [27]:
import pandas as pd
from sklearn import preprocessing

# Load data
File_data = pd.read_csv("./ce889_dataCollection.csv", names=['X1', 'X2', 'Y1', 'Y2'])

print(File_data.head(5))
print(File_data.shape)
print(File_data.max())
print(File_data.min())

# Drop duplicates based on columns X1 and X2
File_data.drop_duplicates(subset=["X1", "X2"], inplace=True)

# Check for missing values
print(File_data.isna().sum())

# Fill missing values with the mean
File_data.fillna(File_data.mean(), inplace=True)

def normalize_dataframe(data_frame):
    normalized_data = data_frame.copy()
    for feature in data_frame.columns:
        val_max = data_frame[feature].max()
        val_min = data_frame[feature].min()
        normalized_data[feature] = (data_frame[feature] - val_min) / (val_max - val_min)
    return normalized_data

# Apply normalization
normalized_Data = normalize_dataframe(File_data)
print(normalized_Data)

# Split data into training and testing sets
train_len = int(len(normalized_Data) * 0.9)
train, test = normalized_Data.iloc[:train_len, :], normalized_Data.iloc[train_len:, :]

# Display training and testing sets
print(train)
print(test)

# Save training and testing sets to CSV files
train.to_csv("training.csv", index=False)
test.to_csv("testing.csv", index=False)


           X1     X2   Y1    Y2
0 -634.874661  399.9  0.2  0.00
1 -634.874661  399.7  0.3  0.04
2 -634.914661  399.4  0.4  0.00
3 -634.914661  399.0  0.5  0.04
4 -634.954661  398.5  0.6  0.00
(31121, 4)
X1    663.946132
X2    553.470250
Y1      7.691698
Y2      7.060572
dtype: float64
X1   -658.498082
X2     65.529118
Y1     -2.809573
Y2     -5.626429
dtype: float64
X1    0
X2    0
Y1    0
Y2    0
dtype: int64
             X1        X2        Y1        Y2
0      0.017863  0.685269  0.286591  0.443480
1      0.017863  0.684859  0.296114  0.446633
2      0.017833  0.684244  0.305637  0.443480
3      0.017833  0.683424  0.315159  0.446633
4      0.017803  0.682400  0.324682  0.443480
...         ...       ...       ...       ...
31116  0.503373  0.019698  0.475915  0.249439
31117  0.505235  0.015213  0.478924  0.237779
31118  0.507208  0.010664  0.482561  0.225939
31119  0.509295  0.006036  0.486834  0.213938
31120  0.511497  0.001317  0.491749  0.201795

[31063 rows x 4 columns]
        