In [13]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Set Pandas to display numbers without scientific notation
pd.set_option('display.float_format', '{:.3f}'.format)

# Step 1: Load and Merge All Files
cleaned_data_folder = 'pdata'  #1 Cleaned data
normalized_data_folder = 'ldata' #2 Normalized data

all_clean_files = [f for f in os.listdir(cleaned_data_folder) if f.endswith('.csv')]


In [14]:
i=0

for file in all_clean_files:
    print(f"\rProcessing file {i}", end='', flush=True)
    i=i+1
    df = pd.read_csv(os.path.join(cleaned_data_folder, file))
    
    data=df
    #display(data)

    # Get the value from row 0, column 1 (which is 'Unix_time_Stamp' column, index 2)
    value_to_subtract = data.iloc[0, 1]

    #print(f"Value to subtract: {value_to_subtract}")

    # Subtract this value from the entire 'Unix_time_Stamp' column
    data['Unix_time_Stamp'] = (data['Unix_time_Stamp'] - value_to_subtract)/1800
    
    # Rename the last column to 'Weight'
    #print(data.columns.values[-1])# = 'Weight'

    # Get the name of the last column
    last_column_name = data.columns[-1]

    # Rename the last column to 'Weight'
    data = data.rename(columns={last_column_name: 'Weight'})

    # Get the value from row 0, column 5 (which is 'Weight' column, index 2)
    value_to_normalize = data.iloc[0, 5]

    #print(f"Value to value_to_normalize: {value_to_normalize}")

    # Subtract this value from the entire 'Unix_time_Stamp' column
    data['Weight'] = (data['Weight'])/value_to_normalize

    # Drop the 3rd column by name
    data = data.drop('Date', axis=1)

    # Rename 'Unix_time_Stamp' to 'time'
    data = data.rename(columns={'Unix_time_Stamp': 'Time'})

    #print(file)

    # Construct the new filename by splitting the original and appending '_norm'
    base_name, ext = os.path.splitext(file)  # Split into name and extension
    new_filename = f"{base_name}_norm{ext}"  # Append '_norm' to the base name

    # Create the full path for saving
    full_path = os.path.join(normalized_data_folder, new_filename)

    # Write the DataFrame to the new CSV file
    data.to_csv(full_path, index=False)  # index=False to avoid writing row indices

    #print(f"DataFrame written to {full_path}")
    #display(data)

    
    
    if i>100:
        break
    #


Processing file 71

In [15]:

all_normalized_files = [f for f in os.listdir(normalized_data_folder) if f.endswith('.csv')]

del data

dfs = []
for file in all_normalized_files:
    df = pd.read_csv(os.path.join(normalized_data_folder, file))
    dfs.append(df)
    
# Merge all the dataframes into one
data = pd.concat(dfs, ignore_index=True)

# Convert data types if necessary
data['Time'] = data['Time'].astype(float)
data['Temp'] = data['Temp'].astype(float)
data['Humidity'] = data['Humidity'].astype(float)
data['Weight'] = data['Weight'].astype(float)

# Display the modified DataFrame
display(data)
#display(data)

Unnamed: 0,Jerky_ID,Time,Temp,Humidity,Weight
0,01,0.000,22.100,30.700,1.000
1,01,1.000,22.100,29.100,0.996
2,01,2.000,21.100,29.600,0.993
3,01,3.000,20.600,29.800,0.990
4,01,4.000,20.200,30.500,0.987
...,...,...,...,...,...
48008,95,184.000,18.600,26.600,0.637
48009,95,185.000,18.600,26.600,0.637
48010,95,186.000,18.700,26.600,0.637
48011,95,187.000,18.800,26.900,0.637


In [16]:
# Create features based on the current time
def create_features(df):
    df['Weight_Lag_1'] = df['Weight'].shift(1)  # Previous weight
    df['Weight_Lag_2'] = df['Weight'].shift(2)  
    df['Weight_Lag_3'] = df['Weight'].shift(3)  
    df['Weight_Lag_4'] = df['Weight'].shift(4)  
    df['Weight_Lag_5'] = df['Weight'].shift(5)  
    df['Weight_Lag_6'] = df['Weight'].shift(6)  
    df['Weight_Lag_7'] = df['Weight'].shift(7) 
    df['Weight_Lag_8'] = df['Weight'].shift(8) 
    df['Weight_Lag_9'] = df['Weight'].shift(9) 
    df['Weight_Lag_10'] = df['Weight'].shift(10) 
    # You can add more lag features if necessary
    return df

data = create_features(data)
data.dropna(inplace=True)  # Drop rows with NaN values

#print(data.head(20))
display(data)


Unnamed: 0,Jerky_ID,Time,Temp,Humidity,Weight,Weight_Lag_1,Weight_Lag_2,Weight_Lag_3,Weight_Lag_4,Weight_Lag_5,Weight_Lag_6,Weight_Lag_7,Weight_Lag_8,Weight_Lag_9,Weight_Lag_10
10,01,10.000,20.000,30.200,0.970,0.972,0.974,0.978,0.980,0.984,0.987,0.990,0.993,0.996,1.000
11,01,11.000,19.800,30.500,0.966,0.970,0.972,0.974,0.978,0.980,0.984,0.987,0.990,0.993,0.996
12,01,12.000,19.600,30.500,0.963,0.966,0.970,0.972,0.974,0.978,0.980,0.984,0.987,0.990,0.993
13,01,13.000,20.000,30.000,0.961,0.963,0.966,0.970,0.972,0.974,0.978,0.980,0.984,0.987,0.990
14,01,14.000,20.000,29.900,0.959,0.961,0.963,0.966,0.970,0.972,0.974,0.978,0.980,0.984,0.987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48008,95,184.000,18.600,26.600,0.637,0.637,0.643,0.637,0.643,0.643,0.643,0.643,0.643,0.643,0.643
48009,95,185.000,18.600,26.600,0.637,0.637,0.637,0.643,0.637,0.643,0.643,0.643,0.643,0.643,0.643
48010,95,186.000,18.700,26.600,0.637,0.637,0.637,0.637,0.643,0.637,0.643,0.643,0.643,0.643,0.643
48011,95,187.000,18.800,26.900,0.637,0.637,0.637,0.637,0.637,0.643,0.637,0.643,0.643,0.643,0.643


In [17]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Define your features and target variable
features = data[['Time', 'Temp', 'Humidity', 'Weight_Lag_1', 'Weight_Lag_2', 'Weight_Lag_3', 'Weight_Lag_4', 'Weight_Lag_5','Weight_Lag_6','Weight_Lag_7','Weight_Lag_8','Weight_Lag_9','Weight_Lag_10']]
target = data['Weight']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 1.820742889257355e-06


In [18]:
def predict_future_weight(time, temp, humidity, previous_weight):
    # Create a DataFrame for the input
    input_data = pd.DataFrame({
        'Time': [time],
        'Temp': [temp],
        'Humidity': [humidity],
        'Weight_Lag_1': [previous_weight],
        'Weight_Lag_2': [previous_weight],
        'Weight_Lag_3': [previous_weight],
        'Weight_Lag_4': [previous_weight],
        'Weight_Lag_5': [previous_weight],
        'Weight_Lag_6': [previous_weight],
        'Weight_Lag_7': [previous_weight],
        'Weight_Lag_8': [previous_weight],
        'Weight_Lag_9': [previous_weight],
        'Weight_Lag_10': [previous_weight]
            # You might want to adjust this
    })
    
    return model.predict(input_data)

# # Example usage
# future_time = 1  # Future time in seconds
# predicted_weight = predict_future_weight(future_time, 18.2, 40.3, 1)
# print(f'Predicted Weight at time 0: {predicted_weight[0]*1000}')

predicted_weight=1
# Loop through future time values from x to xx
for future_time in range(1, 600):  # This will iterate from 20 to 100 inclusive
    predicted_weight = predict_future_weight(future_time, 22, 40 ,predicted_weight)
    if predicted_weight<1:
        print(f'Future Time: {future_time/48:.1f} days, Predicted Weight: {predicted_weight[0] * 1000:.6f}')

        #break
    elif future_time == 599:
        print(f'Did not cure Future Time: {future_time/48:.1f} days, Predicted Weight: {predicted_weight[0] * 1000:.6f}')


Future Time: 0.0 days, Predicted Weight: 993.249228
Future Time: 0.0 days, Predicted Weight: 990.502505
Future Time: 0.1 days, Predicted Weight: 987.705481
Future Time: 0.1 days, Predicted Weight: 984.862310
Future Time: 0.1 days, Predicted Weight: 982.094367
Future Time: 0.1 days, Predicted Weight: 979.985999
Future Time: 0.1 days, Predicted Weight: 977.465089
Future Time: 0.2 days, Predicted Weight: 972.621736
Future Time: 0.2 days, Predicted Weight: 970.798100
Future Time: 0.2 days, Predicted Weight: 967.621464
Future Time: 0.2 days, Predicted Weight: 964.755798
Future Time: 0.2 days, Predicted Weight: 960.740870
Future Time: 0.3 days, Predicted Weight: 957.421937
Future Time: 0.3 days, Predicted Weight: 954.873648
Future Time: 0.3 days, Predicted Weight: 951.438860
Future Time: 0.3 days, Predicted Weight: 947.735508
Future Time: 0.4 days, Predicted Weight: 945.158043
Future Time: 0.4 days, Predicted Weight: 941.493688
Future Time: 0.4 days, Predicted Weight: 938.497473
Future Time: