In [None]:
import pandas as pd

# Load CSVs
ndvi = pd.read_csv('../data/ndvi_nal_sarovar.csv', parse_dates=['date'])
temp = pd.read_csv('../data/temp_ahmedabad.csv')
rain = pd.read_csv('../data/rainfall_surendranagar.csv')

# Ensure datetime
temp['Date'] = pd.to_datetime(temp['Date'], errors='coerce')
rain['Date'] = pd.to_datetime(rain['Date'], errors='coerce')

# Select only numeric columns for resampling
temp_numeric = temp[['Date', 'Temp Max.1', 'Temp Min.1']].copy()
rain_numeric = rain[['Date', 'Avg_rainfall']].copy()

# Resample
temp_16 = temp_numeric.set_index('Date').resample('16D').mean().reset_index()
rain_16 = rain_numeric.set_index('Date').resample('16D').sum().reset_index()

# Merge datasets
df = ndvi.merge(temp_16, left_on='date', right_on='Date', how='left')
df = df.merge(rain_16, on='Date', how='left')

# Rename and select relevant columns
df = df[['date', 'ndvi', 'Temp Max.1', 'Temp Min.1', 'Avg_rainfall']]
df.columns = ['date', 'ndvi', 'max_temp', 'min_temp', 'rainfall_mm']

# Save merged output
df.to_csv('../data/merged_dataset.csv', index=False)
print("âœ… Merged dataset saved successfully.")


âœ… Merged dataset saved successfully.


In [13]:
import pandas as pd

# Load merged dataset
df = pd.read_csv('../data/merged_dataset.csv', parse_dates=['date'])

# Step 6: Add Bloom Labels
df['bloom'] = 0

# Define bloom conditions: NDVI > 0.4, temp > 25Â°C, months Octâ€“Nov
mask = (
    (df['ndvi'] > 0.4) &
    (df['max_temp'] > 25) &
    (df['date'].dt.month.isin([10, 11]))
)
df.loc[mask, 'bloom'] = 1

# Add known bloom events (Â±32 days)
known_blooms = ['2015-10-15', '2018-11-01', '2020-10-20']
for d in known_blooms:
    d = pd.to_datetime(d)
    df.loc[df['date'].between(d, d + pd.Timedelta(days=32)), 'bloom'] = 1

# Save labeled dataset
df.to_csv('../data/labeled_dataset.csv', index=False)
print("âœ… Bloom labels added and saved to labeled_dataset.csv")


âœ… Bloom labels added and saved to labeled_dataset.csv


In [15]:
import pandas as pd

# Load
df = pd.read_csv('../data/labeled_dataset.csv', parse_dates=['date'])

# NDVI growth rate
df['ndvi_growth'] = df['ndvi'].diff()
df['ndvi_volatility'] = df['ndvi'].rolling(3).std()

# Temp features (use correct column name: 'max_temp' instead of 'MaxTemp')
df['temp_avg_7d'] = df['max_temp'].rolling(2).mean()
df['heat_wave'] = (df['max_temp'] > 30).astype(int)

# Rain cumulative (correct column name)
df['rain_cum_30d'] = df['rainfall_mm'].rolling(2).sum()

# Clean and save
df = df.dropna()
df.to_csv('../data/features.csv', index=False)

print("âœ… Feature dataset saved successfully as features.csv")


âœ… Feature dataset saved successfully as features.csv


In [18]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.impute import SimpleImputer

# Features and target
X = df[['ndvi', 'ndvi_growth', 'temp_avg_7d', 'rain_cum_30d']]
y = df['bloom']

# ðŸš¨ Handle missing values
imputer = SimpleImputer(strategy='mean')  # or 'median'
X = imputer.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Evaluate
print(classification_report(y_test, model.predict(X_test)))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        69

    accuracy                           1.00        69
   macro avg       1.00      1.00      1.00        69
weighted avg       1.00      1.00      1.00        69





In [None]:
import pandas as pd


data_17 = pd.read_csv("../archive (2)/modis_2017_India.csv")
data_18 = pd.read_csv("../archive (2)/modis_2018_India.csv")
data_19 = pd.read_csv("../archive (2)/modis_2019_India.csv")
data_20 = pd.read_csv("../archive (2)/modis_2020_India.csv")
data_21 = pd.read_csv("../archive (2)/modis_2021_India.csv")
data_22 = pd.read_csv("../archive (2)/modis_2022_India.csv")




In [None]:
import numpy as np

datas = [data_17,data_18,data_19,data_20,data_21,data_22]
data_comp = []
for data in datas:
    results_1 = np.where(np.isclose(data['latitude'], 22.7, atol=1e-1))
    results_2 = np.where(np.isclose(data['longitude'], 72.05, atol=1e-))
    results = np.intersect1d(results_1[0], results_2[0])
    data_comp.append(data.iloc[results])

In [98]:
df = pd.concat(data_comp)
df

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
34151,22.7024,72.0426,346.2,1.1,1.0,2017-04-10,602,Terra,MODIS,89,6.2,318.4,38.7,D,0
34152,22.7008,72.0529,337.1,1.1,1.0,2017-04-10,602,Terra,MODIS,78,6.2,318.4,20.9,D,0
45877,22.709,72.0537,355.3,1.0,1.0,2017-04-19,556,Terra,MODIS,96,6.2,320.1,50.2,D,0
45878,22.7076,72.0633,335.7,1.0,1.0,2017-04-19,556,Terra,MODIS,62,6.2,317.0,12.1,D,0
50701,22.6569,72.0435,333.0,2.0,1.4,2017-05-01,621,Terra,MODIS,70,6.2,314.5,28.4,D,0
12579,22.6213,72.0306,323.8,1.5,1.2,2018-02-25,816,Aqua,MODIS,50,6.03,310.9,12.4,D,0
44376,22.667,72.0175,339.4,1.3,1.1,2018-04-01,847,Aqua,MODIS,79,6.03,322.1,19.4,D,0
48754,22.7058,72.0894,338.3,1.5,1.2,2018-04-14,817,Aqua,MODIS,74,6.03,316.7,28.8,D,0
49658,22.618,72.0687,334.3,1.3,1.1,2018-04-17,847,Aqua,MODIS,55,6.03,317.1,11.5,D,0
49659,22.6975,72.1329,342.1,1.3,1.1,2018-04-17,847,Aqua,MODIS,83,6.03,319.5,27.8,D,0


In [36]:
results

(array([], dtype=int64),)

In [73]:
data_22.iloc[np.where(np.isclose(data_22['longitude'], 72.05, atol=1e-3))[0]]


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
4223,26.8917,72.0486,327.0,1.1,1.1,2022-02-10,600,Terra,MODIS,59,6.03,304.2,18.2,D,0
4491,26.8948,72.0483,343.3,1.1,1.0,2022-02-11,823,Aqua,MODIS,78,6.03,311.6,37.7,D,0
51110,22.6885,72.0495,336.5,1.2,1.1,2022-05-05,847,Aqua,MODIS,52,6.03,316.9,9.2,D,0
56677,22.6988,72.0506,331.2,2.0,1.4,2022-07-04,901,Aqua,MODIS,79,6.03,297.2,36.9,D,0
63211,26.8732,72.0511,350.0,1.2,1.1,2022-11-02,824,Aqua,MODIS,88,6.03,312.5,56.9,D,0


In [74]:
data_22.iloc[np.where(np.isclose(data_22['latitude'], 22.7, atol=1e-4))[0]]


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
35516,22.7,74.8729,341.5,1.1,1.0,2022-04-10,541,Terra,MODIS,84,6.03,318.9,25.3,D,0
36165,22.6997,75.2607,303.6,1.0,1.0,2022-04-13,1707,Terra,MODIS,55,6.03,291.0,5.5,N,0
36642,22.7002,75.4485,306.8,1.2,1.1,2022-04-15,1655,Terra,MODIS,65,6.03,295.9,7.4,N,0
59917,22.6998,77.4499,321.0,1.0,1.0,2022-10-26,819,Aqua,MODIS,55,6.03,306.5,7.0,D,0
59918,22.7,77.3214,320.8,1.0,1.0,2022-10-26,819,Aqua,MODIS,60,6.03,308.6,4.8,D,0


In [103]:
import numpy as np

datas = [data_17,data_18,data_19,data_20,data_21,data_22]
lat_min, lat_max = 22.68, 22.9
lon_min, lon_max = 71.9, 72.1
data_comp = []
for data in datas:
    results = data[
        (data['latitude'] >= lat_min) & (data['latitude'] <= lat_max) & (data['longitude'] >= lon_min) & (data['longitude'] <= lon_max) 
    ]
    data_comp.append(results)

df = pd.concat(data_comp)

In [107]:
import pandas as pd

df.to_csv("../data/nal_sarovar_timeseries.csv", index=False)


Loading data...


FileNotFoundError: [Errno 2] No such file or directory: 'data/ndvi_nal_sarovar.csv'