In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
# Read the CSV file into a pandas DataFrame
df = pd.read_csv('data_omdena_imputed.csv', delimiter=';')

print(df.head())

   ID                                   Types of Millets Common Name Category  \
0   1                  Pearl millet (Pennisetum glaucum)       PEARL    Major   
1   2                  Finger millet (Eleusine coracana)      FINGER    Major   
2   3                   Foxtail millet (Setaria italica)     FOXTAIL    Minor   
3   4  Proso millet (Panicum miliaceum) (Chena in India)       PROSO    Major   
4   5                 Little millet (Panicum sumatrense)      LITTLE    Minor   

   Drought resistant  Flood Resistant  Min Temperature (ºC)  \
0                1.0              0.0                  30.0   
1                1.0              0.0                  26.0   
2                1.0              0.0                   5.0   
3                1.0              0.0                  20.0   
4                0.0              1.0                  25.0   

   Max Temperature (ºC)  pH level of the soil Min  pH level of the soil Max  \
0                  34.0                       6.0      

In [3]:
# Get the column names of the DataFrame
column_names = df.columns.tolist()

# Print the column names
print(column_names)

['ID', 'Types of Millets', 'Common Name', 'Category', 'Drought resistant', 'Flood Resistant', 'Min Temperature (ºC)', 'Max Temperature (ºC)', 'pH level of the soil Min', 'pH level of the soil Max', 'Soil type', 'Soil Salinity (dS/m) Min', 'Soil Salinity (dS/m) Max', 'Rainfall Required (cm) Min', 'Rainfall Required (cm) Max', 'Altitude range (m) Min', 'Altitude range (m) Max', 'Soil Temperature (ºC) Min', 'Soil Temperature (ºC) Max', 'Soil moisture\nmin', 'Soil moisture\nmax', 'Light Duration (hours) Min', 'Light Duration (hours) Max', 'Land usage for each crop (t/ha) Min', 'Land usage for each crop (t/ha) Max', 'Seeding Rate (kg/ha) Min', 'Seeding Rate (kg/ha) Max', 'Maturity time (days) Min', 'Maturity time (days) Max', 'Planting Depth (cm) Min', 'Planting Depth (cm) Max', 'Planting Geometry (cm x cm)', 'Protein (g)', 'Fat (g)', 'Ash (g)', 'Crude Fibre (g)', 'Carbo- hydrates (g)', 'Energy (kcal)', 'Calcium (mg)', 'Iron (mg)', 'Thiamine (mg)', 'Ribo- flavin (mg)', 'Nia- cin (mg)', 'Pri

In [4]:
# Specify the columns to augment
columns_to_augment = ['Min Temperature (ºC)', 'Max Temperature (ºC)', 'pH level of the soil Min', 'pH level of the soil Max','Soil Salinity (dS/m) Min', 'Soil Salinity (dS/m) Max','Rainfall Required (cm) Min','Rainfall Required (cm) Max','Altitude range (m) Min', 'Altitude range (m) Max', 'Soil Temperature (ºC) Min', 'Soil Temperature (ºC) Max', 'Soil moisture\nmin', 'Soil moisture\nmax', 'Light Duration (hours) Min', 'Light Duration (hours) Max', 'Land usage for each crop (t/ha) Min', 'Land usage for each crop (t/ha) Max', 'Seeding Rate (kg/ha) Min', 'Seeding Rate (kg/ha) Max', 'Maturity time (days) Min', 'Maturity time (days) Max', 'Planting Depth (cm) Min', 'Planting Depth (cm) Max']

In [5]:
def augment_df(df, columns_to_augment, num_augmentations=1):
    augmented_rows = []
    
    for _, row in df.iterrows():
        for _ in range(num_augmentations):
            augmented_row = row.copy()
            
            for column in columns_to_augment:
                min_column = column + ' Min'
                max_column = column + ' Max'
                
                if min_column in row and max_column in row:
                    min_value = row[min_column]
                    max_value = row[max_column]
                    
                    augmented_value = random.uniform(min_value, max_value)
                    augmented_row[column] = augmented_value
                    
            augmented_rows.append(augmented_row)
    
    augmented_df = pd.DataFrame(augmented_rows)
    return augmented_df


In [6]:
num_augmentations = 10000
augmented_df = augment_df(df, columns_to_augment, num_augmentations)

In [7]:
augmented_df

Unnamed: 0,ID,Types of Millets,Common Name,Category,Drought resistant,Flood Resistant,Min Temperature (ºC),Max Temperature (ºC),pH level of the soil Min,pH level of the soil Max,...,Ash (g),Crude Fibre (g),Carbo- hydrates (g),Energy (kcal),Calcium (mg),Iron (mg),Thiamine (mg),Ribo- flavin (mg),Nia- cin (mg),Price (US$ / Kg)
0,1,Pearl millet (Pennisetum glaucum),PEARL,Major,1.0,0.0,30.0,34.0,6.0,7.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
0,1,Pearl millet (Pennisetum glaucum),PEARL,Major,1.0,0.0,30.0,34.0,6.0,7.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
0,1,Pearl millet (Pennisetum glaucum),PEARL,Major,1.0,0.0,30.0,34.0,6.0,7.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
0,1,Pearl millet (Pennisetum glaucum),PEARL,Major,1.0,0.0,30.0,34.0,6.0,7.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
0,1,Pearl millet (Pennisetum glaucum),PEARL,Major,1.0,0.0,30.0,34.0,6.0,7.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,34,Italian Foxtail Millet (Setaria italica subsp....,Italian foxtail millet,Minor,0.0,1.0,25.0,35.0,6.0,7.5,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878
33,34,Italian Foxtail Millet (Setaria italica subsp....,Italian foxtail millet,Minor,0.0,1.0,25.0,35.0,6.0,7.5,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878
33,34,Italian Foxtail Millet (Setaria italica subsp....,Italian foxtail millet,Minor,0.0,1.0,25.0,35.0,6.0,7.5,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878
33,34,Italian Foxtail Millet (Setaria italica subsp....,Italian foxtail millet,Minor,0.0,1.0,25.0,35.0,6.0,7.5,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878


In [8]:
augmented_df.to_csv('augmented_data.csv', index=False)

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder

# Step 1: Load the dataset
data = pd.read_csv('augmented_data.csv')

In [2]:
data = data.drop('Common Name', axis=1)
data = data.drop('Category', axis=1)

In [3]:
data

Unnamed: 0,Types of Millets,Drought resistant,Flood Resistant,Min Temperature (ºC),Max Temperature (ºC),pH level of the soil Min,pH level of the soil Max,Soil type,Soil Salinity (dS/m) Min,Soil Salinity (dS/m) Max,...,Ash (g),Crude Fibre (g),Carbo- hydrates (g),Energy (kcal),Calcium (mg),Iron (mg),Thiamine (mg),Ribo- flavin (mg),Nia- cin (mg),Price (US$ / Kg)
0,Pearl millet (Pennisetum glaucum),1,0,30,34,6.0,7.0,"LC, SL",11.0,12.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
1,Pearl millet (Pennisetum glaucum),1,0,30,34,6.0,7.0,"LC, SL",11.0,12.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
2,Pearl millet (Pennisetum glaucum),1,0,30,34,6.0,7.0,"LC, SL",11.0,12.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
3,Pearl millet (Pennisetum glaucum),1,0,30,34,6.0,7.0,"LC, SL",11.0,12.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
4,Pearl millet (Pennisetum glaucum),1,0,30,34,6.0,7.0,"LC, SL",11.0,12.0,...,1.370000,11.490000,61.780000,348.000000,27.350000,6.420000,0.250000,0.200000,0.860000,19.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339995,Italian Foxtail Millet (Setaria italica subsp....,0,1,25,35,6.0,7.5,SLC,0.0,2.0,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878
339996,Italian Foxtail Millet (Setaria italica subsp....,0,1,25,35,6.0,7.5,SLC,0.0,2.0,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878
339997,Italian Foxtail Millet (Setaria italica subsp....,0,1,25,35,6.0,7.5,SLC,0.0,2.0,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878
339998,Italian Foxtail Millet (Setaria italica subsp....,0,1,25,35,6.0,7.5,SLC,0.0,2.0,...,1.874118,5.105882,72.112941,350.635294,52.604706,4.887647,0.304706,0.141176,1.572941,17.244878


In [4]:
data.dtypes

Types of Millets                        object
Drought resistant                        int64
Flood Resistant                          int64
Min Temperature (ºC)                     int64
Max Temperature (ºC)                     int64
pH level of the soil Min               float64
pH level of the soil Max               float64
Soil type                               object
Soil Salinity (dS/m) Min               float64
Soil Salinity (dS/m) Max               float64
Rainfall Required (cm) Min               int64
Rainfall Required (cm) Max               int64
Altitude range (m) Min                   int64
Altitude range (m) Max                   int64
Soil Temperature (ºC) Min              float64
Soil Temperature (ºC) Max              float64
Soil moisture\nmin                       int64
Soil moisture\nmax                       int64
Light Duration (hours) Min             float64
Light Duration (hours) Max             float64
Land usage for each crop (t/ha) Min    float64
Land usage fo

In [5]:
# Step 2: Separate features and target
features = data.drop('Types of Millets', axis=1)
target = data['Types of Millets']

In [6]:
# Step 3: Perform one-hot encoding for categorical columns
categorical_columns = ['Soil type']

encoder = OneHotEncoder(sparse=False)
encoded_features = encoder.fit_transform(features[categorical_columns])
encoded_column_names = encoder.get_feature_names_out(categorical_columns)
encoded_features_df = pd.DataFrame(encoded_features, columns=encoded_column_names)

# Concatenate encoded features with the remaining columns
features_encoded = pd.concat([encoded_features_df, features.drop(categorical_columns, axis=1)], axis=1)

# Step 4: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features_encoded, target, test_size=0.5, random_state=42)

# Step 5: Model Training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 6: Model Evaluation
y_pred = model.predict(X_test)

# Step 7: Print the predicted millet types
print("Predicted Millet Types:")
print(y_pred)



Predicted Millet Types:
['Japanese Barnyard Millet (Echinochloa frumentacea)'
 'Foxtail millet (Setaria italica)' 'Guinea millet (Brachiaria deflexa)'
 ... 'Buckwheat Millet (Kuttu)' "Job's tears (Coix lacryma-jobi)"
 'Pearl millet (Pennisetum glaucum)']


In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Step 7: Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9704705882352941
Precision: 0.9558484217500355
Recall: 0.9704705882352941
F1-score: 0.9606910063460413


In [8]:
# Step 8: Make predictions on new data
new_data = pd.DataFrame({
    'Drought resistant': [1],
    'Flood Resistant': [0],
    'Min Temperature (ºC)': [20.88],
    'Max Temperature (ºC)': [31.61],
    'pH level of the soil Min': [5.57],
    'pH level of the soil Max': [7.15],
    'Soil type': ['L'],
    'Soil Salinity (dS/m) Min': [1.52],
    'Soil Salinity (dS/m) Max': [3.80],
    'Rainfall Required (cm) Min': [370],
    'Rainfall Required (cm) Max': [559.42],
    'Altitude range (m) Min': [96.18],
    'Altitude range (m) Max': [1947.06],
    'Soil Temperature (ºC) Min': [0],
    'Soil Temperature (ºC) Max': [31.75],
    'Soil moisture\nmin': [20.5],
    'Soil moisture\nmax': [68.08],
    'Light Duration (hours) Min': [11.81],
    'Light Duration (hours) Max': [13.90],
    'Land usage for each crop (t/ha) Min': [1.00],
    'Land usage for each crop (t/ha) Max': [2.15],
    'Seeding Rate (kg/ha) Min': [10.65],
    'Seeding Rate (kg/ha) Max': [17.84],
    'Maturity time (days) Min': [71.21],
    'Maturity time (days) Max': [96.81],
    'Planting Depth (cm) Min': [1.84],
    'Planting Depth (cm) Max': [6.47],
    'Planting Geometry 1 (cm)': [18.54],
    'Planting Geometry 2 (cm) ': [27.65],
    'Protein (g)': [10.40],
    'Fat (g)': [2.96],
    'Ash (g)': [1.87],
    'Crude Fibre (g)': [5.10],
    'Carbo- hydrates (g)': [72.12],
    'Energy (kcal)': [350.635],
    'Calcium (mg)': [52.60],
    'Iron (mg)': [4.88],
    'Thiamine (mg)': [0.30],
    'Ribo- flavin (mg)': [0.14],
    'Nia- cin (mg)': [1.57],
    'Price (US$ / Kg)': [17.24]
})

# Update the categorical_columns list with the categorical column names in new_data
categorical_columns = ['Soil type']

new_encoded_data = encoder.transform(new_data[categorical_columns])
new_encoded_data_df = pd.DataFrame(new_encoded_data, columns=encoded_column_names)

new_data_encoded = pd.concat([new_encoded_data_df, new_data.drop(categorical_columns, axis=1)], axis=1)

# Make sure the columns in new_data_encoded are in the same order as features_encoded
new_data_encoded = new_data_encoded[features_encoded.columns]

# Make predictions on the new data
new_predictions = model.predict(new_data_encoded)

print("Predicted Millet Type:")
print(new_predictions)

Predicted Millet Type:
['Guinea millet (Brachiaria deflexa)']


In [44]:
import pickle

# Save the model to a file
filename = 'millet-model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(model, file)