In [101]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns
from sklearn.decomposition import PCA
import numpy as np
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score

df = pd.read_csv("all data/citizen/kerala.csv")

df = df[df["Species_name"] == "Mango (all varieties)-Mangifera indica"]
df = df[df["Year"] == 2023]
df = df.drop(["Date_of_observation", "Observation_ID", "User_id", "User_Tree_id", "Species_id", "State_name", "Species_name", "Year"], axis=1)
df = df.dropna()
df = df.reset_index(drop=True)

weeks = df["Week"]
df

Unnamed: 0,Lat,Long,Leaves_fresh,Leaves_mature,Leaves_old,Flowers_bud,Flowers_open,Flowers_male,Flowers_Female,Fruits_unripe,Fruits_ripe,Fruits_open,Week
0,10.77145,76.48380,1.0,2.0,1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,42
1,11.86728,75.37995,2.0,2.0,1.0,1.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,40
2,11.75091,75.49266,1.0,2.0,1.0,2.0,2.0,-2.0,-2.0,2.0,0.0,-2.0,42
3,10.13695,76.52394,2.0,1.0,1.0,1.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,39
4,9.98534,76.77693,2.0,2.0,1.0,1.0,1.0,-2.0,-2.0,2.0,1.0,-2.0,42
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6442,8.80151,76.76254,1.0,2.0,1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,22
6443,8.79411,76.75868,1.0,2.0,1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,22
6444,8.79411,76.75868,1.0,2.0,1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,21
6445,8.80151,76.76254,1.0,2.0,1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-2.0,21


In [107]:
data = {}

for week in weeks.unique():
    
    # CLUSTERING
    
    print(f"Week {week}")
    week_df = df[df["Week"] == week]
    
    week_df.drop("Week", axis=1)
    week_df = week_df.reset_index(drop=True)
    
    km = KMeans(n_clusters=1, random_state=42, n_init="auto")
    
    clusters = km.fit_predict(week_df)


    centroids = km.cluster_centers_

    distances = []

    for i, row in week_df.iterrows():
        cluster_label = clusters[i]
        centroid = centroids[0]
        relevant_values = row.values[:len(row.values)]
        float_centroid = [float(val) for val in centroid[:len(centroid)]]
        distance = np.linalg.norm(relevant_values[:len(relevant_values)] - float_centroid)
        distances.append(distance)
    
    #score = silhouette_score(week_df, clusters, metric='euclidean')
    #sil_score.append(score)
        
    #print("Silhouette score: ", score)
        
    # FILTERING
    
    week_df['Distance_to_Centroid'] = distances
    
    std_dev_dist = week_df['Distance_to_Centroid'].std()
    mean = np.mean(week_df["Distance_to_Centroid"])
    
    valid_data = week_df[(week_df['Distance_to_Centroid'] >= mean - 1*std_dev_dist) & (week_df['Distance_to_Centroid'] <= mean + 1*std_dev_dist)]
    
    valid_data = valid_data.reset_index(drop=True)
    clusters2 = km.fit_predict(valid_data)
    
    d = []
    
    centroids = km.cluster_centers_

    for i, row in valid_data.iterrows():
        cluster_label = clusters2[i]
        centroid = centroids[0]
        relevant_values = row.values
        float_centroid = [float(val) for val in centroid]
        distance = np.linalg.norm(relevant_values - float_centroid)
        d.append(distance)
    
    valid_data['Distance_to_Centroid'] = d
    closest_point = valid_data[ valid_data["Distance_to_Centroid"] == min(valid_data['Distance_to_Centroid']) ]
    print(f"Week {week} reference: {closest_point.iloc[0]}")
    
    data[week] = closest_point
    

Week 42
Week 42 reference: Lat                      9.397710
Long                    76.578530
Leaves_fresh             1.000000
Leaves_mature            2.000000
Leaves_old               1.000000
Flowers_bud              0.000000
Flowers_open             0.000000
Flowers_male            -2.000000
Flowers_Female          -2.000000
Fruits_unripe            0.000000
Fruits_ripe              0.000000
Fruits_open             -2.000000
Week                    42.000000
Distance_to_Centroid     1.230258
Name: 40, dtype: float64
Week 40
Week 40 reference: Lat                      9.397710
Long                    76.578530
Leaves_fresh             1.000000
Leaves_mature            2.000000
Leaves_old               1.000000
Flowers_bud              0.000000
Flowers_open             0.000000
Flowers_male            -2.000000
Flowers_Female          -2.000000
Fruits_unripe            0.000000
Fruits_ripe              0.000000
Fruits_open             -2.000000
Week                    40.000000
Dis

Week 23 reference: Lat                      9.397710
Long                    76.578530
Leaves_fresh             1.000000
Leaves_mature            2.000000
Leaves_old               1.000000
Flowers_bud              0.000000
Flowers_open             0.000000
Flowers_male            -2.000000
Flowers_Female          -2.000000
Fruits_unripe            0.000000
Fruits_ripe              0.000000
Fruits_open             -2.000000
Week                    23.000000
Distance_to_Centroid     1.197668
Name: 9, dtype: float64
Week 11
Week 11 reference: Lat                     10.786730
Long                    76.654790
Leaves_fresh             1.000000
Leaves_mature            2.000000
Leaves_old               1.000000
Flowers_bud              1.000000
Flowers_open             1.000000
Flowers_male            -2.000000
Flowers_Female          -2.000000
Fruits_unripe            2.000000
Fruits_ripe              1.000000
Fruits_open             -2.000000
Week                    11.000000
Distance_to_

Week 9 reference: Lat                     10.818530
Long                    76.672830
Leaves_fresh             1.000000
Leaves_mature            2.000000
Leaves_old               1.000000
Flowers_bud              1.000000
Flowers_open             1.000000
Flowers_male            -2.000000
Flowers_Female          -2.000000
Fruits_unripe            1.000000
Fruits_ripe              0.000000
Fruits_open             -2.000000
Week                     9.000000
Distance_to_Centroid     1.412121
Name: 16, dtype: float64
Week 8
Week 8 reference: Lat                     10.025260
Long                    76.366000
Leaves_fresh             1.000000
Leaves_mature            2.000000
Leaves_old               1.000000
Flowers_bud              1.000000
Flowers_open             1.000000
Flowers_male            -2.000000
Flowers_Female          -2.000000
Fruits_unripe            1.000000
Fruits_ripe              0.000000
Fruits_open             -2.000000
Week                     8.000000
Distance_to_Ce

In [103]:
time = np.arange(0, 49)
flowers_bud = []

for key, value in data.items():
    print(f"{key}: {value}")

#for i in range(49):
#    flowers_bud.append(data[i])
#    
#    print(f"Observations in week {i}: ", len(df[df["Week"] == i]))
#
#plt.plot(time, flowers_bud)
#plt.title("Flowers bud, 2023")
#plt.show()

42:         Lat      Long  Leaves_fresh  Leaves_mature  Leaves_old  Flowers_bud  \
40  9.39771  76.57853           1.0            2.0         1.0          0.0   
52  9.39771  76.57853           1.0            2.0         1.0          0.0   
53  9.39771  76.57853           1.0            2.0         1.0          0.0   
55  9.39771  76.57853           1.0            2.0         1.0          0.0   
90  9.39771  76.57853           1.0            2.0         1.0          0.0   
91  9.39771  76.57853           1.0            2.0         1.0          0.0   
93  9.39771  76.57853           1.0            2.0         1.0          0.0   
94  9.39771  76.57853           1.0            2.0         1.0          0.0   

    Flowers_open  Flowers_male  Flowers_Female  Fruits_unripe  Fruits_ripe  \
40           0.0          -2.0            -2.0            0.0          0.0   
52           0.0          -2.0            -2.0            0.0          0.0   
53           0.0          -2.0            -2.0    

28:          Lat      Long  Leaves_fresh  Leaves_mature  Leaves_old  Flowers_bud  \
100  9.39771  76.57853           1.0            2.0         1.0          0.0   

     Flowers_open  Flowers_male  Flowers_Female  Fruits_unripe  Fruits_ripe  \
100           0.0          -2.0            -2.0            0.0          0.0   

     Fruits_open  Week  Distance_to_Centroid  
100         -2.0    28              1.032988  
29:          Lat      Long  Leaves_fresh  Leaves_mature  Leaves_old  Flowers_bud  \
142  9.98358  76.78574           1.0            2.0         1.0          0.0   

     Flowers_open  Flowers_male  Flowers_Female  Fruits_unripe  Fruits_ripe  \
142           0.0          -2.0            -2.0            0.0          0.0   

     Fruits_open  Week  Distance_to_Centroid  
142         -2.0    29              1.219131  
21:         Lat      Long  Leaves_fresh  Leaves_mature  Leaves_old  Flowers_bud  \
9   9.39771  76.57853           1.0            2.0         1.0          0.0   
20

13:         Lat      Long  Leaves_fresh  Leaves_mature  Leaves_old  Flowers_bud  \
57  10.5717  76.48901           1.0            2.0         1.0          1.0   
60  10.5717  76.48901           1.0            2.0         1.0          1.0   

    Flowers_open  Flowers_male  Flowers_Female  Fruits_unripe  Fruits_ripe  \
57           1.0          -2.0            -2.0            2.0          1.0   
60           1.0          -2.0            -2.0            2.0          1.0   

    Fruits_open  Week  Distance_to_Centroid  
57         -2.0    13              1.248294  
60         -2.0    13              1.248294  
12:           Lat      Long  Leaves_fresh  Leaves_mature  Leaves_old  Flowers_bud  \
132  10.18851  76.53914           1.0            2.0         1.0          1.0   

     Flowers_open  Flowers_male  Flowers_Female  Fruits_unripe  Fruits_ripe  \
132           1.0          -2.0            -2.0            1.0          1.0   

     Fruits_open  Week  Distance_to_Centroid  
132         