In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [None]:
  # Upload 'Marine_Fish_Data.csv' here
data = pd.read_csv('Marine_Fish_Data.csv')
data.head()
species_names = data['Species_Name'].unique()
print(species_names)
#give Region column data
region_names = data['Region'].unique()
print(region_names)
#give Breeding_Season column data
breeding_season_names = data['Breeding_Season'].unique()
print(breeding_season_names)
#give fishing method also
fishing_method_names = data['Fishing_Method'].unique()
print(fishing_method_names)
#give water pollution level also
water_pollution_level_names = data['Water_Pollution_Level'].unique()
print(water_pollution_level_names)

['Salmon' 'Tuna' 'Cod' 'Herring' 'Mackerel' 'Sardine' 'Shark' 'Snapper']
['North Atlantic' 'Pacific Ocean' 'Mediterranean Sea' 'Indian Ocean']
['Summer' 'Monsoon' 'Winter']
['Net' 'Line' 'Trawl']
['High' 'Medium' 'Low']


In [None]:
# Step 2: Encode categorical variables
label_encoders = {}
for column in ['Species_Name', 'Region', 'Breeding_Season', 'Fishing_Method', 'Water_Pollution_Level']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [None]:
# Step 3: Define features (X) and target (y)
X = data[['Species_Name', 'Region', 'Breeding_Season', 'Fishing_Method', 'Fish_Population',
          'Average_Size(cm)', 'Water_Temperature(C)', 'Water_Pollution_Level']]
y = data['Overfishing_Risk'].apply(lambda x: 1 if x == 'Yes' else 0)


In [None]:
# Step 4: Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Step 5: Standardize the numerical data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Step 6: Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Step 8: Save the trained model as a pickle file
import pickle

with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Step 9: Save the scaler as a pickle file (in case you want to use the same scaler for future predictions)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [None]:
# Step 7: Evaluate the model
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Model Accuracy: 0.55

Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.54      0.58        57
           1       0.48      0.56      0.52        43

    accuracy                           0.55       100
   macro avg       0.55      0.55      0.55       100
weighted avg       0.56      0.55      0.55       100



In [None]:
data.head()

Unnamed: 0,Species_Name,Region,Breeding_Season,Fishing_Method,Fish_Population,Average_Size(cm),Overfishing_Risk,Water_Temperature(C),Water_Pollution_Level
0,3,2,1,1,8270,71.92,No,24.68,0
1,7,3,0,0,1860,85.33,No,21.33,0
2,0,1,1,0,6390,88.02,No,16.98,2
3,1,3,0,2,6191,85.46,Yes,28.51,1
4,1,0,0,2,6734,48.35,Yes,25.37,1


In [None]:
# Step 8: Take user input for prediction
print("\n--- Enter Details for Overfishing Risk Prediction ---")
species_name = int(input("Enter Species Name (encoded): "))
region = int(input("Enter Region (encoded): "))
breeding_season = int(input("Enter Breeding Season (encoded): "))
fishing_method = int(input("Enter Fishing Method (encoded): "))
fish_population = int(input("Enter Fish Population: "))
avg_size = float(input("Enter Average Size (cm): "))
water_temp = float(input("Enter Water Temperature (°C): "))
water_pollution = int(input("Enter Water Pollution Level (encoded): "))


--- Enter Details for Overfishing Risk Prediction ---
Enter Species Name (encoded): 1
Enter Region (encoded): 2
Enter Breeding Season (encoded): 1
Enter Fishing Method (encoded): 0
Enter Fish Population: 8270
Enter Average Size (cm): 71.92
Enter Water Temperature (°C): 24.68
Enter Water Pollution Level (encoded): 0


In [None]:
user_input = pd.DataFrame([[species_name, region, breeding_season, fishing_method, fish_population,
                            avg_size, water_temp, water_pollution]],
                          columns=['Species_Name', 'Region', 'Breeding_Season', 'Fishing_Method',
                                   'Fish_Population', 'Average_Size(cm)',
                                   'Water_Temperature(C)', 'Water_Pollution_Level'])
user_input = scaler.transform(user_input)
prediction = model.predict(user_input)

In [None]:
# Step 10: Display prediction result
if prediction[0] == 1:
    print("Prediction: Overfishing Risk is HIGH (Yes).")
else:
    print("Prediction: Overfishing Risk is LOW (No).")

Prediction: Overfishing Risk is HIGH (Yes).


In [None]:
#get values of all species_name cloumn
species_names = data['Species_Name'].unique()
print(species_names)

[3 7 0 1 2 4 5 6]


In [None]:
# Step 8: Save the trained model as a pickle file
import pickle

with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Step 9: Save the scaler as a pickle file (in case you want to use the same scaler for future predictions)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)
 # pickel file for label encoder
    with open('label_encoders.pkl', 'wb') as file:
        pickle.dump(label_encoders, file)
