In [132]:
import pandas as pd
import numpy as np

# Load your dataset
data = pd.read_csv('dataset.csv')

# List the columns you want to keep
columns_to_keep = [
    'Id', 'Name', 'Temperat. requir. optimal min', 'Temperat. requir. optimal max',
    'Temperat. requir. absolute min', 'Temperat. requir. absolute max', 
    'Rainfall (annual) optimal min', 'Rainfall (annual) optimal max', 
    'Rainfall (annual) absolute min', 'Rainfall (annual) absolute max', 
    'Altitude optimal min', 'Altitude optimal max', 'Altitude absolute min', 
    'Altitude absolute max', 'Light intensity optimal min', 'Light intensity optimal max', 
    'Light intensity absolute min', 'Light intensity absolute max'
]

absolute_columns = [
    'Temperat. requir. absolute min', 'Temperat. requir. absolute max', 
    'Rainfall (annual) absolute min', 'Rainfall (annual) absolute max', 
    'Light intensity absolute min', 'Light intensity absolute max'
]

# Drop columns that are not in the 'columns_to_keep' list
data = data[columns_to_keep]
print(len(data))  # Check the number of columns in the cleaned dataset

# Replace placeholders with NaN
data.replace(['-', '--', '---', 'NaN'], np.nan, inplace=True)

# Drop rows where any of the absolute temperature columns have missing values
data_cleaned = data.dropna(subset=absolute_columns)

# Check the result
print(data_cleaned)  # Display the cleaned dataset to confirm
print(data_cleaned.isnull().sum())  # Verify if any columns still have missing values
print(len(data_cleaned))  # Check the number of columns in


2562
        Id                                      Name  \
0     1007                           Erythrina fusca   
3     5479                           Diplachne fusca   
4     4912                      Cotylelobium burckii   
5     4913                  Cotylelobium lanceolatum   
6     4914                  Cotylelobium melanoxylon   
...    ...                                       ...   
2557  1406                          Malpighia glabra   
2558  6212                             Ficus vogelii   
2559  2192                        Anethum graveolens   
2560   618                           Capsicum annuum   
2561  4524  Chrysanthemum coronarium var. coronarium   

     Temperat. requir. optimal min Temperat. requir. optimal max  \
0                               16                            24   
3                               22                            28   
4                               22                            34   
5                               19                

  data.replace(['-', '--', '---', 'NaN'], np.nan, inplace=True)


In [133]:
#NORMALIZE DATA
data_cleaned['Temperat. requir. absolute min'] = data_cleaned['Temperat. requir. absolute min'].astype(int)
data_cleaned['Temperat. requir. absolute max'] = data_cleaned['Temperat. requir. absolute max'].astype(int)
data_cleaned['Rainfall (annual) absolute min'] = data_cleaned['Rainfall (annual) absolute min'].astype(int)
data_cleaned['Rainfall (annual) absolute max'] = data_cleaned['Rainfall (annual) absolute max'].astype(int)
data_cleaned['Temperat. requir. optimal max'] = data_cleaned['Temperat. requir. optimal max'].astype(int)
data_cleaned['Temperat. requir. optimal min'] = data_cleaned['Temperat. requir. optimal min'].astype(int)
data_cleaned['Rainfall (annual) optimal max'] = data_cleaned['Rainfall (annual) optimal max'].astype(int)
data_cleaned['Rainfall (annual) optimal min'] = data_cleaned['Rainfall (annual) optimal min'].astype(int)

data_cleaned['avg_temp'] = (data_cleaned['Temperat. requir. absolute min'] +
                            data_cleaned['Temperat. requir. absolute max']) / 2
data_cleaned['avg_rainfall'] = (data_cleaned['Rainfall (annual) absolute min'] +
                                data_cleaned['Rainfall (annual) absolute max']) / 2

light_intensity_mapping = {
    'clear skies': 5,
    'very bright': 4,
    'light shade': 3,
    'cloudy skies': 2,
    'heavy shade': 1
}

data_cleaned['Light intensity absolute max'] = data_cleaned['Light intensity absolute max'].map(light_intensity_mapping)
data_cleaned['Light intensity absolute min'] = data_cleaned['Light intensity absolute min'].map(light_intensity_mapping)
data_cleaned['Light intensity optimal max'] = data_cleaned['Light intensity optimal max'].map(light_intensity_mapping)
data_cleaned['Light intensity optimal min'] = data_cleaned['Light intensity optimal min'].map(light_intensity_mapping)

data_cleaned['avg_light_intensity'] = (data_cleaned['Light intensity absolute min'] +
                                        data_cleaned['Light intensity absolute max']) / 2

data_cleaned.to_csv('numerical_dataset.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned['Temperat. requir. absolute min'] = data_cleaned['Temperat. requir. absolute min'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned['Temperat. requir. absolute max'] = data_cleaned['Temperat. requir. absolute max'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versu

In [134]:
# DROP THE COLUMNS THAT ARE NOT NEEDED TO TRAIN THE MODEL
data_model = data_cleaned.drop(columns=['Temperat. requir. optimal min', 'Temperat. requir. optimal max',
                                        'Temperat. requir. absolute min', 'Temperat. requir. absolute max',
                                        'Rainfall (annual) optimal min', 'Rainfall (annual) optimal max',
                                        'Rainfall (annual) absolute min', 'Rainfall (annual) absolute max',
                                        'Altitude optimal min', 'Altitude optimal max',
                                         'Altitude absolute min', 'Altitude absolute max',
                                          'Light intensity optimal min',
                                        'Light intensity optimal max', 'Light intensity absolute min', 'Light intensity absolute max'])
data_numerical = data_model
data_numerical.to_csv('numerical_dataset.csv', index=False)

In [152]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv('numerical_dataset.csv')

features = df[['avg_temp', 'avg_rainfall', 'avg_light_intensity']].values

scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

feature_weights = np.array([1.5, 1.0, 0.5])

class RecommendationModel(tf.keras.Model):
    def __init__(self, plant_features, scaler, feature_weights):
        super(RecommendationModel, self).__init__()
        self.plant_features = plant_features
        self.scaler = scaler
        self.feature_weights = feature_weights

    def call(self, user_input):
        # Normalize the user input
        user_input_scaled = self.scaler.transform([user_input])
        weighted_user_input = user_input_scaled * self.feature_weights
        user_input_tensor = tf.convert_to_tensor(weighted_user_input, dtype=tf.float32)
        
        weighted_plant_features = self.plant_features * self.feature_weights
        
        distances = tf.norm(weighted_plant_features - user_input_tensor, axis=1)
        return distances

plant_features_tensor = tf.convert_to_tensor(features_scaled, dtype=tf.float32)
model = RecommendationModel(plant_features_tensor, scaler, feature_weights)

def recommend_plants(user_input, top_k=20):
    user_input_list = [user_input['temp'], user_input['rainfall'], user_input['light_intensity']]
    user_input_tensor = tf.convert_to_tensor(user_input_list, dtype=tf.float32)  # Convert input to tensor
    distances = model(user_input_tensor)
    
    _, indices = tf.math.top_k(-distances, k=top_k)  # Negative for ascending order
    
    recommended_plants = df.iloc[indices.numpy()]
    
    return recommended_plants[['Id', 'Name', 'avg_temp', 'avg_rainfall', 'avg_light_intensity']]

user_input = {
    'temp': 17.5,
    'rainfall': 1400.0,
    'light_intensity': 3.0
}

recommended_plants = recommend_plants(user_input)

print("Recommended Plants:")
print(recommended_plants)


Recommended Plants:
         Id                            Name  avg_temp  avg_rainfall  \
1644    693               Cichorium endivia      17.5        1400.0   
1504   6631            Hedysarum coronarium      19.0        1400.0   
1698    970          Echinochloa crus-galli      21.0        1405.0   
411   10187                Syncarpia hillii      22.5        1400.0   
9       289          Abelmoschus esculentus      23.5        1400.0   
1592   2968                   Albizia toona      24.5        1400.0   
423    1721                   Pisum sativum      17.0        1425.0   
1573   5894               Eucalyptus nitens      17.0        1375.0   
292    2509              Chenopodium quinoa      18.5        1425.0   
1132   7209                  Lens culinaris      18.5        1375.0   
2031   2146                      Vicia faba      18.5        1425.0   
1629   9097                    Quercus alba      19.5        1375.0   
352    8646  Pinus oocarpa var. ochoterenai      21.0    

In [154]:
model.save("model.h5")

