# Training Model ETL

### Extract CSV

In [15]:
import pandas as pd

In [29]:
df = pd.read_csv("readings.csv")

### Display the dataset

In [30]:
df

Unnamed: 0,id,batch_id,gravity,temperature,battery,angle,timestamp,brix
0,1,1,1.015740,30.1875,4.056309,31.13034,2025-10-06T16:08:45.513729,4.012791
1,2,1,1.014763,30.1875,4.056309,30.65194,2025-10-06T16:08:58.411916,3.766628
2,3,1,1.013954,30.1250,4.056309,30.26313,2025-10-06T16:09:10.999436,3.562475
3,4,1,1.013093,30.0625,4.056309,29.85785,2025-10-06T16:09:23.931160,3.344882
4,5,1,1.013142,30.0625,4.056309,29.88073,2025-10-06T16:09:39.360793,3.357274
...,...,...,...,...,...,...,...,...
943,944,1,0.999194,26.1875,3.889468,24.21739,2025-10-07T09:01:09.982037,-0.213538
944,945,1,0.999150,26.1875,3.889468,24.20172,2025-10-07T09:02:12.577656,-0.224941
945,946,1,0.999208,26.1875,3.889468,24.22255,2025-10-07T09:02:12.593633,-0.209909
946,947,1,0.999159,26.1875,3.889468,24.20502,2025-10-07T09:02:19.263649,-0.222609


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 948 entries, 0 to 947
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           948 non-null    int64  
 1   batch_id     948 non-null    int64  
 2   gravity      948 non-null    float64
 3   temperature  948 non-null    float64
 4   battery      948 non-null    float64
 5   angle        948 non-null    float64
 6   timestamp    948 non-null    object 
 7   brix         948 non-null    float64
dtypes: float64(5), int64(2), object(1)
memory usage: 59.4+ KB


In [19]:
df.describe()

Unnamed: 0,id,batch_id,gravity,temperature,battery,angle,brix
count,948.0,948.0,948.0,948.0,948.0,948.0,948.0
mean,474.5,1.0,1.002666,28.109968,4.004149,25.56946,0.678422
std,273.808327,0.0,0.004715,1.028162,0.053262,1.857059,1.210512
min,1.0,1.0,0.998079,26.1875,3.863399,23.82536,-0.50279
25%,237.75,1.0,0.998767,27.5,3.983316,24.066267,-0.324242
50%,474.5,1.0,0.999198,28.625,4.035453,24.21887,-0.212371
75%,711.25,1.0,1.005876,28.9375,4.045881,26.737265,1.507973
max,948.0,1.0,1.01574,30.1875,4.056309,31.13034,4.012791


## Transform Data

### Remove the unnecessary columns

In [31]:
df = df.drop(columns=["id", "batch_id", "battery", "angle"])

### Remove the rows after 3 days

In [32]:
df

Unnamed: 0,gravity,temperature,timestamp,brix
0,1.015740,30.1875,2025-10-06T16:08:45.513729,4.012791
1,1.014763,30.1875,2025-10-06T16:08:58.411916,3.766628
2,1.013954,30.1250,2025-10-06T16:09:10.999436,3.562475
3,1.013093,30.0625,2025-10-06T16:09:23.931160,3.344882
4,1.013142,30.0625,2025-10-06T16:09:39.360793,3.357274
...,...,...,...,...
943,0.999194,26.1875,2025-10-07T09:01:09.982037,-0.213538
944,0.999150,26.1875,2025-10-07T09:02:12.577656,-0.224941
945,0.999208,26.1875,2025-10-07T09:02:12.593633,-0.209909
946,0.999159,26.1875,2025-10-07T09:02:19.263649,-0.222609


### Load

In [33]:
df.to_csv("transform_readings.csv", index=False)

### Modeling

### LSTM

In [23]:
pip install tensorflow scikit-learn pandas matplotlib

Note: you may need to restart the kernel to use updated packages.


In [42]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [39]:
df = pd.read_csv("transform_readings.csv")

In [40]:
df.head()

Unnamed: 0,gravity,temperature,timestamp,brix
0,1.01574,30.1875,2025-10-06T16:08:45.513729,4.012791
1,1.014763,30.1875,2025-10-06T16:08:58.411916,3.766628
2,1.013954,30.125,2025-10-06T16:09:10.999436,3.562475
3,1.013093,30.0625,2025-10-06T16:09:23.931160,3.344882
4,1.013142,30.0625,2025-10-06T16:09:39.360793,3.357274


In [41]:
# Convert Brix to Specific Gravity
df['gravity'] = (df['brix'] / (258.6 - ((df['brix'] / 258.2) * 227.1))) + 1

# Compute Alcohol Content using your given formula
df['Alcohol_Content'] = (((182.4601 * df['gravity'] - 775.6821) * df['gravity'] + 1262.7794) * df['gravity'] - 669.5622)

# Display new columns
df[['timestamp', 'brix', 'temperature', 'Alcohol_Content']].head()

Unnamed: 0,timestamp,brix,temperature,Alcohol_Content
0,2025-10-06T16:08:45.513729,4.012791,30.1875,4.010799
1,2025-10-06T16:08:58.411916,3.766628,30.1875,3.764479
2,2025-10-06T16:09:10.999436,3.562475,30.125,3.560194
3,2025-10-06T16:09:23.931160,3.344882,30.0625,3.34246
4,2025-10-06T16:09:39.360793,3.357274,30.0625,3.35486
