In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, fbeta_score, recall_score, f1_score
from sklearn.svm import SVC
import warnings
from tqdm import tqdm
# Suppress all warnings
warnings.filterwarnings('ignore')
# Read data
lulc = pd.read_csv(r"C:\Users\rishi\ml_projects\climate\his _data\lulc2013-2020.ascii", delim_whitespace=" ", header=None)
NDVI = pd.read_csv(r"C:\Users\rishi\ml_projects\climate\his _data\ndvi2013-2020.ascii", delim_whitespace=" ", header=None)
NTL = pd.read_csv(r"C:\Users\rishi\ml_projects\climate\his _data\light2013-2020.ascii", delim_whitespace=" ", header=None)
NLST = pd.read_csv(r"C:\Users\rishi\ml_projects\climate\his _data\nlst2013-2020winter.ascii.txt", delim_whitespace=" ", header=None)

years = ['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']
# Set column names
features = ['LAT', 'LON', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']
lulc.columns = features
NDVI.columns = features
NTL.columns = features
NLST.columns = features

# Create label column
label = [0] * len(lulc)

# Assign label columns using the assign method
NDVI = NDVI.assign(**{f'LABEL{year}': label for year in features[2:]})
NTL = NTL.assign(**{f'LABEL{year}': label for year in features[2:]})
NLST = NLST.assign(**{f'LABEL{year}': label for year in features[2:]})

# Create lulc5class dataframe
lulc5class = lulc[['LAT', 'LON']].copy()
lulc5class = lulc5class.assign(**{year: label for year in features[2:]})

NDVI2013_Stats = np.array([0.006310,0.329295,0.368230,0.406575,0.428990,0.451680,0.497855,0.549734,0.676060])
NDVI2014_Stats = np.array([0.01534,0.31944,0.35232,0.39562,0.42403,0.45191,0.50575,0.57044,0.68919])
NDVI2015_Stats = np.array([0.001440,0.313796,0.339270,0.377435,0.404515,0.433180,0.487600,0.553323,0.690870])
NDVI2016_Stats = np.array([0.084000,0.298626,0.326649,0.360650,0.382765,0.406303,0.459164,0.522264,0.68354])
NDVI2017_Stats = np.array([0.016320,0.314050,0.354115,0.394945,0.423150,0.453055,0.505440,0.562915,0.692900])
NDVI2018_Stats = np.array([0.028490,0.290248,0.325766,0.366240,0.390020,0.414450,0.466064,0.539572,0.693560])
NDVI2019_Stats = np.array([0.034230,0.283785,0.322558,0.362320,0.385895,0.410930,0.464591,0.532113,0.701080])
NDVI2020_Stats = np.array([0.124510,0.317275,0.369860,0.416120,0.443110,0.470660,0.526102,0.578686,0.701330])

NTL2013_Stats = np.array([0.052080,0.117029,0.220620,0.474755,0.898780,1.922630,6.962390,15.433894,166.173920])
NTL2014_Stats = np.array([0.121830,0.189728,0.316550,0.615650,1.086410,2.250460,8.168370,18.230936,139.583240])
NTL2015_Stats = np.array([0.120000,0.226231,0.338837,0.617167,1.059640,2.221200,8.209082,18.110648,116.407750])
NTL2016_Stats = np.array([0.065810,0.145933,0.262828,0.538498,0.980750,2.151752,8.469348,19.178180,133.405980])
NTL2017_Stats = np.array([0.266930,0.339335,0.459495,0.765120,1.249190,2.520820,9.396260,20.549545,113.043390])
NTL2018_Stats = np.array([0.288410,0.383675,0.505946,0.819290,1.309870,2.593840,9.207640,19.376706,79.393230])
NTL2019_Stats = np.array([0.264740,0.361753,0.562240,0.945805,1.563625,3.146050,10.597157,21.035070,104.566140])
NTL2020_Stats = np.array([0.324180,0.436717,0.591118,0.958160,1.515770,2.978240,10.412932,20.602327,106.105190])

NLST2013_Stats = np.array([287.285110,288.048590,288.656923,289.372720,289.908410,290.732959,291.660587,292.370998,294.589492])
NLST2014_Stats = np.array([287.704293,288.619973,289.154898,289.895568,290.460287,291.238315,292.223480,292.973070,295.166597])
NLST2015_Stats = np.array([287.714853,288.427444,288.867664,289.549443,290.218842,290.890540,291.775363,292.491522,294.795607])
NLST2016_Stats = np.array([287.127640,287.982581,288.652156,289.380648,290.043414,290.859635,291.870475,292.733057,295.083667])
NLST2017_Stats = np.array([287.135442,288.091897,288.781463,289.498207,290.071140,290.849413,291.895694,292.797523,295.047878])
NLST2018_Stats = np.array([241.472825,289.383476,289.920992,290.616288,291.038900,291.675645,292.504277,293.060004,294.953475])
NLST2019_Stats = np.array([288.702422,289.265219,289.739503,290.377565,290.835513,291.485930,292.262664,292.898314,294.806772])
NLST2020_Stats = np.array([288.064313,288.709629,289.291594,290.011147,290.526907,291.117690,291.991338,293.069216,296.132568])

In [2]:
NDVI

Unnamed: 0,LAT,LON,2013,2014,2015,2016,2017,2018,2019,2020,LABEL2013,LABEL2014,LABEL2015,LABEL2016,LABEL2017,LABEL2018,LABEL2019,LABEL2020
0,17.3225,78.00750,0.35729,0.37116,0.36133,0.35694,0.37248,0.34203,0.31961,0.37996,0,0,0,0,0,0,0,0
1,17.3275,78.00750,0.38009,0.38530,0.38388,0.38077,0.39532,0.35160,0.32587,0.39876,0,0,0,0,0,0,0,0
2,17.3325,78.00750,0.40148,0.41639,0.40740,0.39464,0.41569,0.36443,0.33475,0.42572,0,0,0,0,0,0,0,0
3,17.3225,78.01250,0.35981,0.37591,0.35774,0.34819,0.37457,0.32586,0.31050,0.38240,0,0,0,0,0,0,0,0
4,17.3275,78.01250,0.37971,0.39027,0.38710,0.38317,0.39949,0.33789,0.31540,0.38711,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26453,17.5125,79.03250,0.44256,0.39413,0.38830,0.37734,0.42831,0.38659,0.40121,0.44743,0,0,0,0,0,0,0,0
26454,17.4975,79.03751,0.41896,0.36106,0.36272,0.34624,0.39445,0.37177,0.38386,0.43411,0,0,0,0,0,0,0,0
26455,17.5025,79.03751,0.42990,0.38245,0.38196,0.36419,0.41835,0.38922,0.39890,0.45123,0,0,0,0,0,0,0,0
26456,17.5075,79.03751,0.43843,0.40499,0.39334,0.36792,0.42712,0.40560,0.41735,0.46986,0,0,0,0,0,0,0,0


In [3]:
for i in tqdm(range(26458), desc="Processing Data Points"):
    for year in tqdm(['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020'], desc="Processing Years", leave=False):
        value = lulc[year].loc[i]
        if value in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
            lulc5class[year].loc[i] = 1
        elif value == 10:
            lulc5class[year].loc[i] = 2
        elif value in [11, 15, 17]:
            lulc5class[year].loc[i] = 3
            NDVI['LABEL' + year].loc[i] = 1
            NTL['LABEL' + year].loc[i] = 1
            NLST['LABEL' + year].loc[i] = 1
        elif value in [12, 14]:
            lulc5class[year].loc[i] = 4
        elif value == 13:
            lulc5class[year].loc[i] = 5
            NDVI['LABEL' + year].loc[i] = 4
            NTL['LABEL' + year].loc[i] = 4
            NLST['LABEL' + year].loc[i] = 4
        elif value == 16:
            lulc5class[year].loc[i] = 6

Processing Data Points: 100%|██████████| 26458/26458 [03:49<00:00, 115.21it/s]


In [4]:
NDVI
df=lulc5class

In [5]:
df['2020'].value_counts()


2020
4    23301
5     2550
2      566
1       22
3       15
6        4
Name: count, dtype: int64

In [15]:
for year in years:
    suffix = str(year)

    globals()[f'NDVI_{suffix}'] = NDVI[['LAT', 'LON', suffix, f'LABEL{suffix}']].copy()
    globals()[f'NTL_{suffix}'] = NTL[['LAT', 'LON', suffix, f'LABEL{suffix}']].copy()
    globals()[f'NLST_{suffix}'] = NLST[['LAT', 'LON', suffix, f'LABEL{suffix}']].copy()

    globals()[f'NDVI_{suffix}_Urban'] = globals()[f'NDVI_{suffix}'][(globals()[f'NDVI_{suffix}'][f'LABEL{suffix}'] == 4)]
    globals()[f'NTL_{suffix}_Urban'] = globals()[f'NTL_{suffix}'][(globals()[f'NTL_{suffix}'][f'LABEL{suffix}'] == 4)]
    globals()[f'NLST_{suffix}_Urban'] = globals()[f'NLST_{suffix}'][(globals()[f'NLST_{suffix}'][f'LABEL{suffix}'] == 4)]

    globals()[f'NDVI_{suffix}_Water'] = globals()[f'NDVI_{suffix}'][(globals()[f'NDVI_{suffix}'][f'LABEL{suffix}'] == 1)]
    globals()[f'NTL_{suffix}_Water'] = globals()[f'NTL_{suffix}'][(globals()[f'NTL_{suffix}'][f'LABEL{suffix}'] == 1)]
    globals()[f'NLST_{suffix}_Water'] = globals()[f'NLST_{suffix}'][(globals()[f'NLST_{suffix}'][f'LABEL{suffix}'] == 1)]

    globals()[f'NDVI_{suffix}_NotUrban'] = globals()[f'NDVI_{suffix}'][(globals()[f'NDVI_{suffix}'][f'LABEL{suffix}'] != 4)]
    globals()[f'NTL_{suffix}_NotUrban'] = globals()[f'NTL_{suffix}'][(globals()[f'NTL_{suffix}'][f'LABEL{suffix}'] != 4)]
    globals()[f'NLST_{suffix}_NotUrban'] = globals()[f'NLST_{suffix}'][(globals()[f'NLST_{suffix}'][f'LABEL{suffix}'] != 4)]

    globals()[f'NDVI_{suffix}_NotUrbanNotWater'] = globals()[f'NDVI_{suffix}_NotUrban'][(globals()[f'NDVI_{suffix}_NotUrban'][f'LABEL{suffix}'] != 1)]
    globals()[f'NTL_{suffix}_NotUrbanNotWater'] = globals()[f'NTL_{suffix}_NotUrban'][(globals()[f'NTL_{suffix}_NotUrban'][f'LABEL{suffix}'] != 1)]
    globals()[f'NLST_{suffix}_NotUrbanNotWater'] = globals()[f'NLST_{suffix}_NotUrban'][(globals()[f'NLST_{suffix}_NotUrban'][f'LABEL{suffix}'] != 1)]

for year in years:
    suffix_not_urban_not_water = f'_{year}_NotUrbanNotWater'
    suffix_urban = f'_{year}_Urban'
    suffix_water = f'_{year}_Water'
    
    globals()[f'Rule{suffix_not_urban_not_water}'] = globals()[f'NTL_{year}_NotUrbanNotWater'][['LAT', 'LON']].copy()
    globals()[f'Rule{suffix_not_urban_not_water}']['NTL'] = globals()[f'NTL_{year}_NotUrbanNotWater'][year].values
    globals()[f'Rule{suffix_not_urban_not_water}']['NLST'] = globals()[f'NLST_{year}_NotUrbanNotWater'][year].values
    globals()[f'Rule{suffix_not_urban_not_water}']['NDVI'] = globals()[f'NDVI_{year}_NotUrbanNotWater'][year].values
    globals()[f'Rule{suffix_not_urban_not_water}']['LABEL'] = globals()[f'NTL_{year}_NotUrbanNotWater'][f'LABEL{year}'].values
    
    globals()[f'Rule{suffix_urban}'] = globals()[f'NTL_{year}_Urban'][['LAT', 'LON']].copy()
    globals()[f'Rule{suffix_urban}']['NTL'] = globals()[f'NTL_{year}_Urban'][year].values
    globals()[f'Rule{suffix_urban}']['NLST'] = globals()[f'NLST_{year}_Urban'][year].values
    globals()[f'Rule{suffix_urban}']['NDVI'] = globals()[f'NDVI_{year}_Urban'][year].values
    globals()[f'Rule{suffix_urban}']['LABEL'] = globals()[f'NTL_{year}_Urban'][f'LABEL{year}'].values
    
    globals()[f'Rule{suffix_water}'] = globals()[f'NTL_{year}_Water'][['LAT', 'LON']].copy()
    globals()[f'Rule{suffix_water}']['NTL'] = globals()[f'NTL_{year}_Water'][year].values
    globals()[f'Rule{suffix_water}']['NLST'] = globals()[f'NLST_{year}_Water'][year].values
    globals()[f'Rule{suffix_water}']['NDVI'] = globals()[f'NDVI_{year}_Water'][year].values
    globals()[f'Rule{suffix_water}']['LABEL'] = globals()[f'NTL_{year}_Water'][f'LABEL{year}'].values

In [16]:
def process_data(dataframe, NLST_stats, NTL_stats, NDVI_stats):
    for i in range(len(dataframe)):
        if (dataframe['NLST'].iloc[i] >= NLST_stats[5]) and (dataframe['NTL'].iloc[i] >= NTL_stats[6]) and (NDVI_stats[2] <= dataframe['NDVI'].iloc[i] <= NDVI_stats[5]):
            dataframe['LABEL'].iloc[i] = 3
        elif (dataframe['NLST'].iloc[i] <= NLST_stats[3]) and (dataframe['NTL'].iloc[i] <= NTL_stats[3]) and ((NDVI_stats[3] <= dataframe['NDVI'].iloc[i]) or (dataframe['NDVI'].iloc[i] >= NDVI_stats[5])):
            dataframe['LABEL'].iloc[i] = 2

In [17]:
with_none_dfs = []

for year in years:
    rule_df = globals()[f'Rule_{year}_NotUrbanNotWater']
    nlst_stats = globals()[f'NLST{year}_Stats']
    ntl_stats = globals()[f'NTL{year}_Stats']
    ndvi_stats = globals()[f'NDVI{year}_Stats']

    process_data(rule_df, nlst_stats, ntl_stats, ndvi_stats)
    
    urban_df = globals()[f'Rule_{year}_Urban']
    water_df = globals()[f'Rule_{year}_Water']
    with_none_pre = pd.concat([rule_df, urban_df], ignore_index=True)
    with_none = pd.concat([with_none_pre, water_df], ignore_index=True) 
    
    model_plot = with_none[with_none['LABEL'] != 0]
    model = with_none[(with_none['LABEL'] != 0) & (with_none['LABEL'] != 4) & (with_none['LABEL'] != 1)]
    predict = with_none[with_none['LABEL'] == 0]
    
    globals()[f'Rule_{year}_WithNone_Model_Plot'] = model_plot
    globals()[f'Rule_{year}_WithNone_Model'] = model
    globals()[f'Rule_{year}_WithNone_Predict'] = predict
    
    with_none_dfs.append(model)

In [24]:
with_none[with_none['LABEL']==4]

Unnamed: 0,LAT,LON,NTL,NLST,NDVI,LABEL
23893,17.3775,78.04750,0.54966,289.089613,0.50689,4
23894,17.3825,78.04750,0.51474,289.177423,0.46852,4
23895,17.3775,78.05251,1.00000,288.990392,0.50702,4
23896,17.3325,78.06250,1.07217,289.440990,0.42814,4
23897,17.3375,78.06250,0.53075,289.308050,0.43244,4
...,...,...,...,...,...,...
26438,17.5225,78.89750,17.96671,292.880610,0.38779,4
26439,17.5325,78.89750,1.46253,292.215513,0.47712,4
26440,17.2525,78.90250,7.78561,291.791917,0.35836,4
26441,17.4625,78.92250,0.88007,291.758057,0.48538,4


In [25]:
# label_4_points=with_none[with_none['LABEL']==4][['LAT', 'LON']]
# label_4_points.to_csv(r"lat_long_label_4_points.csv",index=False)


In [26]:
rule_df['LABEL'].value_counts()

LABEL
0    22068
2     1476
3      349
Name: count, dtype: int64

In [27]:
Rule_Combined_PandR = pd.concat(with_none_dfs, ignore_index=True)
Rule_Combined_PandR_np = np.array(Rule_Combined_PandR)

features = Rule_Combined_PandR_np[:, :-1]
labels = Rule_Combined_PandR_np[:, -1]

In [28]:
Rule_Combined_PandR['LABEL'].value_counts()

LABEL
2    9781
3    3075
Name: count, dtype: int64

In [29]:
lat_lon_label_3=[]
for year in years:
    # Access the model plot for the given year
    rule_df = globals()[f'Rule_{year}_WithNone_Model_Plot']

    # Filter points with label 3 and store their LAT/LON
    label_3_points = rule_df[rule_df['LABEL'] == 2][['LAT', 'LON']]
    lat_lon_label_3.append(label_3_points)

# Concatenate all LAT/LON points into a single DataFrame
lat_lon_label_3_df = pd.concat(lat_lon_label_3, ignore_index=True)

# Optional: Save the LAT/LON points to a CSV file
lat_lon_label_3_df.to_csv("lat_lon_label_2_points.csv", index=False)