In [1]:
import pandas as pd

# Load the dataset

fish_data = pd.read_csv('Fish.csv')

# Display the first few rows of the dataset to understand its structure
fish_data.head(), fish_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Species  159 non-null    object 
 1   Weight   159 non-null    float64
 2   Length1  159 non-null    float64
 3   Length2  159 non-null    float64
 4   Length3  159 non-null    float64
 5   Height   159 non-null    float64
 6   Width    159 non-null    float64
dtypes: float64(6), object(1)
memory usage: 8.8+ KB


(  Species  Weight  Length1  Length2  Length3   Height   Width
 0   Bream   242.0     23.2     25.4     30.0  11.5200  4.0200
 1   Bream   290.0     24.0     26.3     31.2  12.4800  4.3056
 2   Bream   340.0     23.9     26.5     31.1  12.3778  4.6961
 3   Bream   363.0     26.3     29.0     33.5  12.7300  4.4555
 4   Bream   430.0     26.5     29.0     34.0  12.4440  5.1340,
 None)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Preparing the data
X = fish_data.drop('Weight', axis=1)
y = fish_data['Weight']

# Encoding categorical data
categorical_features = ['Species']
one_hot = OneHotEncoder()
transformer = ColumnTransformer([('one_hot', one_hot, categorical_features)], remainder='passthrough')

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a pipeline
regression_model = Pipeline(steps=[('transformer', transformer),
                                   ('model', RandomForestRegressor(n_estimators=100, random_state=42))])

# Training the model
regression_model.fit(X_train, y_train)

# Evaluating the model
y_pred = regression_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

mse


4557.917397503473

In [3]:
import pickle

# Save the model to a file
model_file_path = 'fish_model.pkl'
with open(model_file_path, 'wb') as file:
    pickle.dump(regression_model, file)

model_file_path


'fish_model.pkl'