In [None]:
crocodile-linear-regression/
 crocodile_dataset.csv
 main.py
 requirements.txt
 README.md

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv("crocodile_dataset.csv")

# Display basic info
print("Dataset Loaded Successfully ✅")
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())

In [None]:
# We will predict weight based on length
df = df[["Observed Length (m)", "Observed Weight (kg)"]].dropna()

X = df[["Observed Length (m)"]]  # Independent variable
y = df["Observed Weight (kg)"]   # Dependent variable

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Performance 📊")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")


In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', label='Actual Data')
plt.plot(X, model.predict(X), color='red', linewidth=2, label='Regression Line')
plt.title("Crocodile Weight Prediction")
plt.xlabel("Observed Length (m)")
plt.ylabel("Observed Weight (kg)")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
sample_length = 3.5
predicted_weight = model.predict([[sample_length]])[0]
print(f"\nPredicted weight for crocodile of length {sample_length} m: {predicted_weight:.2f} kg")


In [None]:
import joblib
joblib.dump(model, "crocodile_weight_model.pkl")
print("\nModel saved as crocodile_weight_model.pkl ✅")