<a href="https://colab.research.google.com/github/IndeewaAmarasinghe/RP-2024-25J-125-Crop-yield-prediction---Paddy-harvest-/blob/IT21227868-Rice-Variety-Recommendation/Support_Vector_Classifier_(SVC).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

# Load the datasets
varieties_file_path = '/content/drive/MyDrive/Rice Genie/RiceVarietiesData.xlsx'
district_file_path = '/content/drive/MyDrive/Rice Genie/SriLankaDistricts.csv'

varieties_data = pd.read_excel(varieties_file_path, sheet_name='Sheet1')
district_data = pd.read_csv(district_file_path)

# Merge datasets using inner join on 'Recommendation'
combined_data = pd.merge(varieties_data, district_data, how='inner', on='Recommendation')

Mounted at /content/drive


In [None]:
def convert_maturity_to_numeric(value):
    if isinstance(value, str) and '-' in value:
        parts = value.split('-')
        return (int(parts[0]) + int(parts[1])) / 2
    try:
        return float(value)
    except ValueError:
        return np.nan

# Convert 'Maturity (days)' to numeric averages
combined_data['Maturity (days)'] = combined_data['Maturity (days)'].apply(convert_maturity_to_numeric)
combined_data.dropna(subset=['Maturity (days)'], inplace=True)


In [None]:
# Create a list of recommendation keywords/phrases that indicate suitability
recommendation_keywords = [
    "Low Country Wet Zone",
    "High potential area",
    "Rainfed areas",
    "Wet Zone",
    "Saline prone areas",
    "Northern region",
    "Saline areas",
    "Iron toxic soil and acidic soil",
    "Major irrigation in Dry Zone and Intermediate Zone",
    "Southern province",
    "High potential areas in Low Country Wet Zone",
    "Dry Zone",
    "Rainfed areas of Dry and Intermediate Zone",
    "General cultivation"
]

In [None]:
# Creating a new column 'Suitability' based on multiple recommendation criteria
combined_data['Suitability'] = combined_data.apply(
    lambda row: 1 if (any(keyword.lower() in str(row['Recommendation']).lower() for keyword in recommendation_keywords)
                      and row['Average Yield (t/ha)'] >= 5.0) else 0,
    axis=1
)

print("Column names in rice varieties dataset:", varieties_data.columns)
print("Column names in merged dataset:", combined_data.columns)

Column names in rice varieties dataset: Index(['Variety Name', 'Year of Release', 'Parentage', 'Average Yield (t/ha)',
       'Maturity (days)', 'Age Group', 'Basal Leaf Sheath Colour',
       'Recommendation', 'Brown Rice Recovery (%)', 'Milling Recovery (%)',
       'Head Rice Recovery (%)', 'Gelatinization Temperature',
       '1000 Grain Weight (g)', 'Grain Shape', 'Pericarp Colour',
       'Bushel Weight (Kg)', 'Reaction to Pest and Diseases'],
      dtype='object')
Column names in merged dataset: Index(['Variety Name', 'Year of Release', 'Parentage', 'Average Yield (t/ha)',
       'Maturity (days)', 'Age Group', 'Basal Leaf Sheath Colour',
       'Recommendation', 'Brown Rice Recovery (%)', 'Milling Recovery (%)',
       'Head Rice Recovery (%)', 'Gelatinization Temperature',
       '1000 Grain Weight (g)', 'Grain Shape', 'Pericarp Colour',
       'Bushel Weight (Kg)', 'Reaction to Pest and Diseases', 'Province',
       'District', 'Annual Temperature', 'Annual Humidity', 'Annual

In [None]:
from sklearn.preprocessing import LabelEncoder

le_temperature = LabelEncoder()
le_rainfall = LabelEncoder()

# Encoding 'AnnualTemperature' and 'Annual Rainfall' into numerical values
combined_data['Annual Temperature'] = le_temperature.fit_transform(combined_data['Annual Temperature'])
combined_data['Annual Rainfall'] = le_rainfall.fit_transform(combined_data['Annual Rainfall'])

# Define features (X) and target (y)
features = [
    'Average Yield (t/ha)', 'Maturity (days)', 'Annual Temperature', 'Annual Rainfall'
]
X = combined_data[features]
y = combined_data['Suitability']

from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.svm import SVC

# Train the Support Vector Classifier (SVC) model
svc_model = SVC(kernel='rbf', random_state=42)
svc_model.fit(X_train, y_train)

from sklearn.metrics import classification_report, accuracy_score

# Predict and evaluate the model
y_pred = svc_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy (Support Vector Classifier):", accuracy)
print(report)

Accuracy (Support Vector Classifier): 0.6111111111111112
              precision    recall  f1-score   support

           0       0.89      0.23      0.37       141
           1       0.57      0.97      0.72       147

    accuracy                           0.61       288
   macro avg       0.73      0.60      0.54       288
weighted avg       0.73      0.61      0.55       288



In [None]:
from sklearn.model_selection import cross_val_score
import numpy as np

# Perform 5-fold cross-validation
cv_scores = cross_val_score(svc_model, X, y, cv=5, scoring='accuracy')
mean_cv_score = np.mean(cv_scores)
print("Mean Cross-Validation Score (Support Vector Classifier):", mean_cv_score)

Mean Cross-Validation Score (Support Vector Classifier): 0.5897914246999612


In [None]:
def predict_suitable_varieties_for_district(district_name, district_data, varieties_data, model):
    # Get the environmental conditions for the specified district
    district_conditions = district_data[district_data['District'] == district_name].iloc[0]

    # Extract the relevant features for prediction
    annual_temperature = le_temperature.transform([district_conditions['Annual Temperature']])[0]
    annual_rainfall = le_rainfall.transform([district_conditions['Annual Rainfall']])[0]

    # Create a dataframe for prediction with the relevant features of each variety combined with district conditions
    prediction_data = varieties_data.copy()
    prediction_data['Annual Temperature'] = annual_temperature
    prediction_data['Annual Rainfall'] = annual_rainfall

    # Convert maturity to numeric where applicable
    prediction_data['Maturity (days)'] = prediction_data['Maturity (days)'].apply(convert_maturity_to_numeric)
    prediction_data.dropna(subset=['Maturity (days)'], inplace=True)

    # Prepare the feature set
    X_pred = prediction_data[['Average Yield (t/ha)', 'Maturity (days)', 'Annual Temperature', 'Annual Rainfall']]

    # Predict suitability for each rice variety
    prediction_data['Suitability'] = model.predict(X_pred)

    # Filter for suitable varieties
    suitable_varieties = prediction_data[prediction_data['Suitability'] == 1]

    # Return the suitable varieties
    return suitable_varieties[['Variety Name', 'Average Yield (t/ha)', 'Maturity (days)', 'Gelatinization Temperature', 'Grain Shape']]


In [None]:
recommended_varieties_matara = predict_suitable_varieties_for_district('Matara', district_data, varieties_data, svc_model)

# Print the results
print("Recommended Rice Varieties for Matara District (Support Vector Classifier):")
print(recommended_varieties_matara)

Recommended Rice Varieties for Matara District (Support Vector Classifier):
    Variety Name  Average Yield (t/ha)  Maturity (days)  \
0            H 4                   3.5            127.5   
1            H 7                   3.6            105.0   
2            H 8                   3.7            135.0   
3            H 9                   3.0            155.0   
6       Bg 11-11                   4.5            127.5   
..           ...                   ...              ...   
93        At 306                   4.7            102.0   
96        At 307                   5.0             97.0   
97        Ld 371                   4.6            103.0   
98        Bw 372                   4.2            100.0   
101   Bg 381(IP)                   4.5            105.0   

    Gelatinization Temperature          Grain Shape  
0                         High          Long Medium  
1                 Intermediate  Intermediate Medium  
2                         High          Short Round  