In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

#Load the dataset
url = 'https://raw.githubusercontent.com/Seun999/datasets/main/Fish.csv'
df = pd.read_csv(url)

#Check for missing values
print(df.isnull().sum())
df.dropna(inplace=True)

#One-hot encode the 'Species' column
df = pd.get_dummies(df, columns=['Species'])

#Split the data into features (X) and the target variable (y)
X = df.drop('Weight', axis=1)
y = df['Weight']

#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the regression model
model = LinearRegression()
model.fit(X_train, y_train)

#Making predictions on the test set
y_pred = model.predict(X_test)

#Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

#Using the model for prediction
new_data = pd.DataFrame({
    'Length1': [32.4],
    'Length2': [35.0],
    'Length3': [38.5],
    'Height': [17.4],
    'Width': [11.0],
    'Species_Bream': [1],  # One-hot encoded 'Species' column for Bream (1 for Bream, 0 for other species)
    'Species_Parkki': [0],  # One-hot encoded 'Species' column for Parkki
    'Species_Perch': [0],   # One-hot encoded 'Species' column for Perch
    'Species_Pike': [0],    # One-hot encoded 'Species' column for Pike
    'Species_Roach': [0],   # One-hot encoded 'Species' column for Roach
    'Species_Smelt': [0],   # One-hot encoded 'Species' column for Smelt
    'Species_Whitefish': [0]  # One-hot encoded 'Species' column for Whitefish
})

predicted_weight = model.predict(new_data)

print("Predicted Weight:", predicted_weight[0])


Species    0
Weight     0
Length1    0
Length2    0
Length3    0
Height     0
Width      0
dtype: int64
Mean Squared Error: 7007.383189853871
R-squared: 0.9507352480054517
Predicted Weight: 599.9449779889956
