In [3]:
import pandas as pd
import sqlalchemy
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle

# Database connection
engine = sqlalchemy.create_engine("mysql+pymysql://root:Omkar12345@localhost:3306/samplecheck")

# Load data
df_wb = pd.read_sql_table("weighbridge", engine)
df_cs = pd.read_sql_table("coalsample", engine)
df_td = pd.read_sql_table("transition_delay", engine)

# Create weight variation column
df_wb['weight_variation'] = df_wb['expected_weight_MT'] - df_wb['challan_quantity_MT']

# Merge dataframes
df = pd.merge(df_cs, df_wb[['entry_id', 'weight_variation']], on='entry_id')
df = pd.merge(df, df_td[['entry_id', 'delay_minutes']], on='entry_id')

# Prepare features and labels
X = df[['vehicle_no', 't_id', 'weight_variation', 'delay_minutes']]
y = df[['moisture', 'gcv']]

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['vehicle_no', 't_id'])
    ],
    remainder='passthrough' 
)

# Create and train the model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', MultiOutputRegressor(RandomForestRegressor()))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

# Save the model
with open('coal_quality_prediction_model.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Model trained and saved successfully.")


TypeError: Feature names are only supported if all input features have string names, but your input has ['quoted_name', 'str'] as feature name / column name types. If you want feature names to be stored and validated, you must convert them all to strings, by using X.columns = X.columns.astype(str) for example. Otherwise you can remove feature / column names from your input data, or convert them all to a non-string data type.

model.predict(["OD23C8890","BXN214",2.0,90])

In [None]:
model.predict(["OD23C8890","BXN214","2.0","90"])

ValueError: Expected 2D array, got 1D array instead:
array=['OD23C8890' 'BXN214' '2.0' '90'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.