SVM

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVR

file_path = r"D:\Inzpire-Solutions\Training\merged_data_original_transformed.csv"
try:
    data = pd.read_csv(file_path)
    print(data.info())
    print(data.head())
except Exception as e:
    print(f"Error loading data: {e}")

categorical_columns = ['Crate']

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
try:
    encoded_columns = encoder.fit_transform(data[categorical_columns])
    encoded_column_names = encoder.get_feature_names_out(categorical_columns)
    encoded_df = pd.DataFrame(encoded_columns, columns=encoded_column_names, index=data.index)
    data = pd.concat([data.drop(categorical_columns, axis=1), encoded_df], axis=1)
except Exception as e:
    print(f"Error in one-hot encoding: {e}")


X = data.drop(['GrossWeight',"_id"], axis=1)
y = data["GrossWeight"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = SVR(kernel='linear', C=8, epsilon=0.09)

try:
    model.fit(X_train, y_train)
except Exception as e:
    print(f"Error in training model: {e}")

try:
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"Mean Absolute Error (MAE): {mae}")
except Exception as e:
    print(f"Error in prediction or evaluation: {e}")

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler

file_path = "/content/merged_data_original_transformed.csv"
try:
    data = pd.read_csv(file_path)
    print(data.info())
    print(data.head())
except Exception as e:
    print(f"Error loading data: {e}")

categorical_columns = ['Crate']
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
try:
    encoded_columns = encoder.fit_transform(data[categorical_columns])
    encoded_column_names = encoder.get_feature_names_out(categorical_columns)
    encoded_df = pd.DataFrame(encoded_columns, columns=encoded_column_names, index=data.index)
    data = pd.concat([data.drop(categorical_columns, axis=1), encoded_df], axis=1)
except Exception as e:
    print(f"Error in one-hot encoding: {e}")

X = data.drop(['GrossWeight', '_id'], axis=1)
y = data['GrossWeight']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

class TransformerRegressor(nn.Module):
    def __init__(self, input_dim, embed_dim, num_heads, num_layers):
        super(TransformerRegressor, self).__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embed_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x.unsqueeze(1))
        x = self.fc(x[:, 0, :])
        return x

input_dim = X_train.shape[1]
embed_dim = 64
num_heads = 4
num_layers = 2
model = TransformerRegressor(input_dim, embed_dim, num_heads, num_layers)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs.squeeze(), y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_test_tensor).squeeze()
    y_pred = y_pred_tensor.numpy()

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")

Epoch 499/500, Loss: 5330.1343

Epoch 500/500, Loss: 5329.8823

Root Mean Squared Error (RMSE): 18.05666188994635

Mean Absolute Error (MAE): 15.878791025855161

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import warnings

warnings.filterwarnings('ignore')

file_path = r"D:\Inzpire-Solutions\Training\merged_data_original_transformed.csv"
try:
    data = pd.read_csv(file_path)
    print(data.info())
    print(data.head())
except Exception as e:
    print(f"Error loading data: {e}")

data.columns = data.columns.astype(str)

categorical_columns = ['Crate']
encoder = OneHotEncoder(sparse_output=True, handle_unknown='ignore')
try:
    encoded_columns = encoder.fit_transform(data[categorical_columns])
    encoded_column_names = encoder.get_feature_names_out(categorical_columns)
    encoded_df = pd.DataFrame.sparse.from_spmatrix(encoded_columns, index=data.index, columns=encoded_column_names)
    data = pd.concat([data.drop(categorical_columns, axis=1), encoded_df], axis=1)
except Exception as e:
    print(f"Error in one-hot encoding: {e}")

X = data.drop(['GrossWeight', '_id'], axis=1)
y = data['GrossWeight']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt'],
}

random_search = RandomizedSearchCV(
    rf,
    param_distributions=param_dist,
    n_iter=20,
    cv=3,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1,
    verbose=1,
)

try:
    random_search.fit(X_train, y_train)
    best_rf = random_search.best_estimator_
    print("Best Parameters for Random Forest:", random_search.best_params_)
except Exception as e:
    print(f"Error in Random Forest hyperparameter tuning: {e}")

try:
    y_pred_rf = best_rf.predict(X_test)
    rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    print(f"Random Forest - Root Mean Squared Error (RMSE): {rmse_rf}")
    print(f"Random Forest - Mean Absolute Error (MAE): {mae_rf}")
except Exception as e:
    print(f"Error in model evaluation: {e}")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3297 entries, 0 to 3296
Columns: 187 entries, _id to GM_40kg
dtypes: float64(185), int64(1), object(1)
memory usage: 4.7+ MB
None
                        _id  Crate  NetWeight  GrossWeight  KMR_1.5kg  \
0  66cdbe64547097decd66003e  20259      5.562     6.500000        1.0   
1  66cdbeaa547097decd660042  10067      2.652     6.600000        0.0   
2  66ed1639abaa2c70c8da8974  40021     28.602    36.599998        0.0   
3  66ed163fabaa2c70c8da8977  20293     26.536    34.599998        0.0   
4  66ed18d5abaa2c70c8da8a39  40098     29.986    37.950001        0.0   

   MUT_2kg  PR_2kg  KAB_1kg  MS_0.5kg  NM_1kg  ...  BI_1.5kg  SID_1kg  \
0      1.0     1.0      0.0       0.0     0.0  ...       0.0      0.0   
1      0.0     0.0      1.0       1.0     1.0  ...       0.0      0.0   
2      0.0     0.0      3.0       4.0     5.0  ...       0.0      0.0   
3      0.0     0.0      4.0       4.0     2.0  ...       0.0      0.0   
4      0.0     0

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import Ridge
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import xgboost as xgb
import warnings

warnings.filterwarnings('ignore')

file_path = r"D:\Inzpire-Solutions\Training\merged_data_original_transformed.csv"
try:
    data = pd.read_csv(file_path)
    print(data.info())
    print(data.head())
except Exception as e:
    print(f"Error loading data: {e}")

data.columns = data.columns.astype(str)

categorical_columns = ['Crate']
encoder = OneHotEncoder(sparse_output=True, handle_unknown='ignore')
try:
    encoded_columns = encoder.fit_transform(data[categorical_columns])
    encoded_column_names = encoder.get_feature_names_out(categorical_columns)
    encoded_df = pd.DataFrame.sparse.from_spmatrix(encoded_columns, index=data.index, columns=encoded_column_names)
    data = pd.concat([data.drop(categorical_columns, axis=1), encoded_df], axis=1)
except Exception as e:
    print(f"Error in one-hot encoding: {e}")

X = data.drop(['GrossWeight', '_id'], axis=1)
y = data['GrossWeight']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

rf_model = RandomForestRegressor(n_estimators=200, max_depth=30, min_samples_split=10, min_samples_leaf=2, random_state=42)
xgb_model = xgb.XGBRegressor(n_estimators=300, max_depth=10, learning_rate=0.1, random_state=42)

stacking_model = StackingRegressor(
    estimators=[('rf', rf_model), ('xgb', xgb_model)],
    final_estimator=Ridge()
)

stacking_model.fit(X_train, y_train)

y_pred_stacked = stacking_model.predict(X_test)
rmse_stacked = np.sqrt(mean_squared_error(y_test, y_pred_stacked))
mae_stacked = mean_absolute_error(y_test, y_pred_stacked)
print(f"Stacked Model - RMSE: {rmse_stacked}")
print(f"Stacked Model - MAE: {mae_stacked}")

param_dist_rf = {
    'n_estimators': [100, 200, 300, 500, 1000],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 4, 8],
    'max_features': ['auto', 'sqrt', 'log2'],
}

param_dist_xgb = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 6, 10, 15],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
}

random_search_rf = RandomizedSearchCV(
    rf_model,
    param_distributions=param_dist_rf,
    n_iter=20,
    cv=3,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1,
    verbose=1,
)

random_search_rf.fit(X_train, y_train)
best_rf = random_search_rf.best_estimator_
print("Best Parameters for Random Forest:", random_search_rf.best_params_)

random_search_xgb = RandomizedSearchCV(
    xgb_model,
    param_distributions=param_dist_xgb,
    n_iter=20,
    cv=3,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1,
    verbose=1,
)

random_search_xgb.fit(X_train, y_train)
best_xgb = random_search_xgb.best_estimator_
print("Best Parameters for XGBoost:", random_search_xgb.best_params_)

y_pred_rf = best_rf.predict(X_test)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
mae_rf = mean_absolute_error(y_test, y_pred_rf)
print(f"Random Forest - RMSE: {rmse_rf}")
print(f"Random Forest - MAE: {mae_rf}")

y_pred_xgb = best_xgb.predict(X_test)
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
print(f"XGBoost - RMSE: {rmse_xgb}")
print(f"XGBoost - MAE: {mae_xgb}")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3297 entries, 0 to 3296
Columns: 187 entries, _id to GM_40kg
dtypes: float64(185), int64(1), object(1)
memory usage: 4.7+ MB
None
                        _id  Crate  NetWeight  GrossWeight  KMR_1.5kg  \
0  66cdbe64547097decd66003e  20259      5.562     6.500000        1.0   
1  66cdbeaa547097decd660042  10067      2.652     6.600000        0.0   
2  66ed1639abaa2c70c8da8974  40021     28.602    36.599998        0.0   
3  66ed163fabaa2c70c8da8977  20293     26.536    34.599998        0.0   
4  66ed18d5abaa2c70c8da8a39  40098     29.986    37.950001        0.0   

   MUT_2kg  PR_2kg  KAB_1kg  MS_0.5kg  NM_1kg  ...  BI_1.5kg  SID_1kg  \
0      1.0     1.0      0.0       0.0     0.0  ...       0.0      0.0   
1      0.0     0.0      1.0       1.0     1.0  ...       0.0      0.0   
2      0.0     0.0      3.0       4.0     5.0  ...       0.0      0.0   
3      0.0     0.0      4.0       4.0     2.0  ...       0.0      0.0   
4      0.0     0