In [284]:
#Importing Required librairies
import pandas as pd
import geopandas as gp
import json

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_rows',10)  # Set to None for unlimited rows
pd.set_option('display.max_columns', None)  # Set to None for unlimited columns

In [285]:
# Writing 3 important functions

def area_acres(df_merged):
    
    """
    function that takes in a dataframe as input and adds column of area acre in it
    """
    
    df_merged['area acre'] =  (df_merged.area / 4046.8564224).round(2)
    
def area_hectares(df_merged):
    
    """
    function that takes in a dataframe as input and adds column of area hectare in it
    """
    
    df_merged['area hectare'] =  (df_merged.area / 10000).round(2)

def crop_dic_creator():
    
    """
    function that creates a dictonary with 'Crop ID' as keys and 'Crop Name' as values
    """
    
    crop_names = ['urban','sugarcane','cotton','maize','others','orchards','juaar','rice','other vegetation','chilli','mustard','canola','banana','tobacco','wheat','tomato','mountain','CP-77400',
'CPF-237',
'CPF-246',
'CPF-253',
'HSF-240',
'HSF-242',
'NSG-59',
'Other Variety',
'Ponda',
'SPF-234'
]
    
    
    
    crop_types_dic = dict(zip(range(len(crop_names)),crop_names))
    
    crop_types_dic[100] = 'esurvey'
    
    # Stress 
    crop_types_dic[77] = 'Stress'
    crop_types_dic[78] = 'Potential Stress'
    crop_types_dic[79] = 'No Stress'
    crop_types_dic[80] = 'High Vigour'
    
    
    
    # Health
    
    crop_types_dic[73] = 'Low Vegetation'
    crop_types_dic[74] = 'Moderate'
    crop_types_dic[75] = 'Good'
    crop_types_dic[76] = 'Excellent'
    
    
    # VRA
    
    crop_types_dic[83] = 'Low'
    crop_types_dic[84] = 'Medium'
    crop_types_dic[85] = 'High'
    
    
    # SOM
    
    crop_types_dic[88] = 'Low'
    crop_types_dic[87] = 'Medium'
    crop_types_dic[86] = 'High'
    
    
    return crop_types_dic



In [286]:
# Writing main intersect function

def intersect(bound_df,model_df,name):
    
    """Takes in boundary and model file and returns areawise stats

    Args:
        bound_df: Geodataframe of UC or District or Tehsil
        model_df: Geodataframe of maize or other or other vegetation
        area : string that tells if bound_df is UC or Tehsil or District
    Returns:
        intersection: Geodataframe that tells UC or District or Tehsilwise stats
        
    """
    
    # Removing passbook before intersecting
    
    if 'passbook' in model_df.columns:
        model_df = model_df.drop('passbook',axis = 1)
    
    
    # Changing Crs of both Boundary Data and Model Data
    orignal_crs = bound_df.crs
    
    estimated_utm_crs_bound = bound_df.estimate_utm_crs().to_string()
    estimated_utm_crs_model = model_df.estimate_utm_crs().to_string()
    
    bound_df = bound_df.to_crs(estimated_utm_crs_bound)
    model_df = model_df.to_crs(estimated_utm_crs_model)
    

    crop_name = crop_id_to_name_dic[model_df['crop id'][0]]
        
    
    intersection = bound_df.overlay(model_df, how = "intersection")

    # Dissolving intersection polygons based on same UC
    intersection = intersection.dissolve(by = name)
    intersection = intersection.reset_index()
    

    # Finding crop_area and crop_perc
        
    intersection[crop_name + '_area'] = intersection.area / 4046.8564224 

    # dropping crop id
    
    intersection = intersection.drop(['crop id'], axis = 1)
    
    
    # to store geometry for later use
    tb_short = bound_df[[name,'geometry']]
    

    # To cater all those UC's with 0 Crops
    tb_UC = set(bound_df[name])
    intersection_UC = set(intersection[name])
    no_crop_UC = tb_UC.difference(intersection_UC)
    tb_no_crop = bound_df[bound_df[name].isin(no_crop_UC)]
    tb_no_crop[crop_name + '_area'] = 0
    intersection = pd.concat([intersection,tb_no_crop],axis = 0)
    
    # Changing datatype from float to int
 
    intersection[crop_name + '_area'] = intersection[crop_name + '_area'].astype(int)
    
    
    intersection = intersection.rename(columns = {'geometry' : 'Geo'})
    intersection = intersection.set_geometry('Geo')
    intersection = intersection.merge(tb_short, on= name, how='inner')
    intersection = intersection.set_geometry('geometry')
    intersection.drop(columns = ['Geo'],inplace = True)
    
    
    # Converting back to orignal Crs
    intersection = intersection.to_crs(orignal_crs)
    
    return intersection

In [287]:
# Writing the questions asking function

def questions():

    """
    Asking file no's that need to merged ans storing it in variable "files"
    
    """
    
    global user_name
    global season
    global crop
    global report_type
    
    
    print('Enter User Name?')
    user_name = input()
    
    print('Enter Season ?')
    season = input()
    
    print('Enter Crop Name ?')
    crop = input()
    
    print('Enter Report Type?')
    report_type = input()
    

    print('How Many Files to be Merged?')
    files = int(input())
    
    # Asking path of each file and storing in array "paths"

    for i in range(files):

        print('Enter Path of File No ', i + 1)
        path = input()    
        path = path.strip('"')
        paths.append(path)
        
          
    # Asking  Unit of Area and storing it in variable "area"

    print('Do you need Area in Hectares or Acres ? (Type H for Hectares or A for Acres)')
    area = input()

In [288]:
# Writing report_tileset_Creation function

def report_tileset_creation():
   

    # Reading all dataframes from paths to array "dfs" 
    
    global dfs
    dfs = []

    for i in paths:
        dfs.append(gp.read_file(i))


    # combining all dataframes from array "dfs" to make one dataframe "df_merged"

    df_merged = pd.concat(dfs, ignore_index=True)

    # storing orignal crs of dataframe

    og_crs = df_merged.crs
    
    
    # Changing the crs of dataframe to estimate crs

    estimated_utm_crs = df_merged.estimate_utm_crs().to_string()

    df_merged = df_merged.to_crs(estimated_utm_crs)

    # changing datatype of column 'predicted' to integer

    df_merged['predicted'] = df_merged['predicted'].astype(int)

    # changing name of column 'predicted' to 'crop id'

    df_merged = df_merged.rename(columns = {'predicted': 'crop id'})
    

    # making variable called 'pred_arr' that stores all unique crop id in data

    pred_arr = df_merged['crop id'].unique()

    
    # Splitting merged_df to different dataframes that has seperate crops and storing them in array "dfs_out"

    dfs_out = []

    for i in pred_arr:
        dfs_out.append(df_merged[df_merged['crop id'] == i]) 
        
    # For each dataframe in dfs_out


    for i in range(len(dfs_out)):

        # Adding area column

        if area == 'A':
            area_acres(dfs_out[i])
        elif area == 'H':
            area_hectares(dfs_out[i])

        # Changing crs back to orignal crs    

        dfs_out[i] = dfs_out[i].to_crs(og_crs)
        
        # reseting index of all dataframes

        dfs_out[i] = dfs_out[i].reset_index(drop=True)
        
    return dfs_out

In [289]:
# Writing the bound_questions function

def bound_questions():
    
    """
    Function that asks boundary questions
    """
    
    
    global date
    global no_boundaries
    global survey_titles
    
    survey_titles = []
    
    
    
    # Asking Number of Boundary Files and storing them in variable "no_boundaries"

    print('Enter Number Of Boundary Files')
    no_boundaries = int(input())
    
    print('Enter Date for this boundary')
    date = input()
    

    # Asking path of each boundary file and storing in array "paths_boundaries"

    for i in range(no_boundaries):

        print('Enter Path of Boundary File No ', i + 1)
        path = input()    
        path = path.strip('"')
        paths_boundaries.append(path)
        
        
        print('Enter Survey Title Of This Boundary')
        survey_titles.append(input()) 
        
        

In [290]:
# Writing intersect_caller function

def intersect_caller(model_df):
    
    """
    function that takes in 1 model dataframe and intersects with all boundary dataframes also adds id to boundary
    """
    
    bound_dfs = []
    intersect_dfs = []

    for i in paths_boundaries:
        bound_dfs.append(gp.read_file(i))
    
    # adding id to boundary file
    for i in range(len(bound_dfs)):
        bound_dfs[i]['id'] = bound_dfs[i].index + 1
    
    
    area = ''
    dissolve = []
    
    for i in bound_dfs:
        
        intersect_dfs.append(intersect(i,model_df,'Boundary Name'))

    return intersect_dfs

In [291]:
%%time
# Calling report tileset creation

diss_column = []
paths = []
area = ''
output_path = ''

no_boundaries = 0
paths_boundaries = []


crop_id_to_name_dic = crop_dic_creator()

questions()
bound_questions()

Enter User Name?
Faran
Enter Season ?
2023
Enter Crop Name ?
Sugarcane
Enter Report Type?
Crop Scan
How Many Files to be Merged?
2
Enter Path of File No  1
"C:\Users\FARMDAR\Desktop\Abbas\Data\Faran\17 Nov\Faran Esurvey\Faran_ESurvey_1515.shp"
Enter Path of File No  2
"C:\Users\FARMDAR\Desktop\Abbas\Data\Faran\17 Nov\Faran L1 with other crop fixed\Faran_l1_with_other_crop_fixed.shp"
Do you need Area in Hectares or Acres ? (Type H for Hectares or A for Acres)
A
Enter Number Of Boundary Files
4
Enter Date for this boundary
2023-09-11
Enter Path of Boundary File No  1
"C:\Users\FARMDAR\Desktop\Abbas\Data\Faran\Faran E Survey\Faran\Boundary\aoi.geojson"
Enter Survey Title Of This Boundary
aoi
Enter Path of Boundary File No  2
"C:\Users\FARMDAR\Desktop\Abbas\Data\Faran\Faran E Survey\Faran\Boundary\circle.geojson"
Enter Survey Title Of This Boundary
gates
Enter Path of Boundary File No  3
"C:\Users\FARMDAR\Desktop\Abbas\Data\Faran\Faran E Survey\Faran\Boundary\deh.geojson"
Enter Survey Titl

In [292]:
dfs_out = report_tileset_creation()

In [293]:
# Checking the dataframes

crop_id_to_name_dic

{0: 'urban',
 1: 'sugarcane',
 2: 'cotton',
 3: 'maize',
 4: 'others',
 5: 'orchards',
 6: 'juaar',
 7: 'rice',
 8: 'other vegetation',
 9: 'chilli',
 10: 'mustard',
 11: 'canola',
 12: 'banana',
 13: 'tobacco',
 14: 'wheat',
 15: 'tomato',
 16: 'mountain',
 17: 'CP-77400',
 18: 'CPF-237',
 19: 'CPF-246',
 20: 'CPF-253',
 21: 'HSF-240',
 22: 'HSF-242',
 23: 'NSG-59',
 24: 'Other Variety',
 25: 'Ponda',
 26: 'SPF-234',
 100: 'esurvey',
 77: 'Stress',
 78: 'Potential Stress',
 79: 'No Stress',
 80: 'High Vigour',
 73: 'Low Vegetation',
 74: 'Moderate',
 75: 'Good',
 76: 'Excellent',
 83: 'Low',
 84: 'Medium',
 85: 'High',
 88: 'Low',
 87: 'Medium',
 86: 'High'}

In [294]:
%%time
# Calling the intersect caller
#ans_dfs is 2d array with first index as crop type and second index as boundary type

ans_dfs = []

for df in dfs_out:
    ans_dfs.append(intersect_caller(df))

CPU times: total: 19min 16s
Wall time: 30min 4s


In [295]:
# Joining Dataframes

# Initialize the ans list with copies of the first DataFrame in ans_dfs
ans = [ans_dfs[0][j].copy() for j in range(len(ans_dfs[0]))]

# Extract the columns from the first DataFrame as potential merge columns
merge_columns = list(ans[0].columns)

# Loop through each DataFrame in ans_dfs
for i in range(1, len(ans_dfs)):  # Start from the second DataFrame (index 1)
    for j in range(len(ans_dfs[i])):
        # Extract the common columns between the current DataFrame and merge_columns
        common_columns = list(set(merge_columns) & set(ans_dfs[i][j].columns))
        


        # Merge the current DataFrame with ans[j] based on the dynamically determined common columns
        ans[j] = ans[j].merge(ans_dfs[i][j], on=common_columns, how='inner')

# ans now contains the merged DataFrames with dynamically determined merge columns


#Removing Duplicated Columns

for i in range(len(ans)):

    # Get a list of columns to drop based on suffixes
    columns_to_drop = [col for col in ans[i].columns if col.endswith(('_y', '_z'))]

    # Drop the columns with the specified suffixes
    ans[i] = ans[i].drop(columns=columns_to_drop)
    
    # stripping all columns with name ending with _x

    cols = list(ans[i].columns)
    new_cols = []

    for j in ans[i].columns:
        j = j.strip('_x')
        new_cols.append(j)

    ans[i].columns = new_cols
        
        
        
    ans[i] = ans[i].loc[:, ~ans[i].columns.duplicated()]
    
    

In [329]:
ans[1].head()

Unnamed: 0,Boundary Name,id,esurvey_area,geometry,sugarcane_area,others_area,other vegetation_area,cotton_area,banana_area,orchards_area
0,Additional Dehs,8,41,"MULTIPOLYGON (((68.77632 25.08338, 68.77563 25...",2121,22597,6260,6930,7528,163
1,Newly Allocated Area,2,946,"MULTIPOLYGON (((68.60823 25.09789, 68.60967 25...",4059,22738,8412,10881,8486,2962
2,Sector Chamber,5,522,"MULTIPOLYGON (((68.53591 25.13183, 68.53364 25...",1538,9547,2581,4592,1506,374
3,Sector Chandia,3,815,"MULTIPOLYGON (((68.74170 25.17044, 68.74123 25...",935,6142,2116,1921,1772,159
4,Sector Daro Sendi,4,1,"MULTIPOLYGON (((68.75074 25.20248, 68.74845 25...",1645,20059,3151,3654,444,261


In [319]:
# Making total_stats dictonary

total_stats = {}

# Finding Total Area
Total_Area = 0

for df in ans:
        
        estimated_utm_crs = df.estimate_utm_crs().to_string()
        df = df.to_crs(estimated_utm_crs)
        Total_Area = ((df.area / 4046.8564224).round(2)).sum()


total_stats['Total Area'] = str(Total_Area)
    
# Finding Total Growers
growers = 0 

for df in dfs:
    if 'passbook' in df.columns:
        growers = str(df['passbook'].nunique())

total_stats['Total Growers'] = str(growers)


#Finding Total Esurvey

esurvey = 0

if 'esurvey_area' not in list(ans[0].columns):
    esurvey = 'N/A'
else:
    for df in dfs:
        if df['predicted'].iloc[0] == '100' :
            estimated_utm_crs = df.estimate_utm_crs().to_string()
            df = df.to_crs(estimated_utm_crs)
            esurvey = ((df.area / 4046.8564224).round(2)).sum().round(2)
            
total_stats['Total Esurvey'] = str(esurvey)


# Finding Total Crop Area

array = [str(num) for num in range(17) if num != 1]

total_crop_area = 0
for df in dfs:
    
    if df['predicted'].nunique() > 1:
        
        print(df['predicted'].unique())
        
        df = df[~df['predicted'].isin(array)]
        
        
        print(df['predicted'].unique())
        

        estimated_utm_crs = df.estimate_utm_crs().to_string()
        df = df.to_crs(estimated_utm_crs)
        total_crop_area += (df.area / 4046.8564224).round(2).sum().round(2)
        
total_stats['Total Crop Area'] = str(total_crop_area)
                
total_stats    

['1' '4' '8' '2' '12' '5']
['1']


{'Total Area': '249393.28',
 'Total Growers': '587',
 'Total Esurvey': '7754.82',
 'Total Crop Area': '15171.24'}

In [298]:
ans[0]['Boundary Name'].dtype

dtype('O')

In [299]:
list(ans[0].columns)

['Boundary Name',
 'id',
 'esurvey_area',
 'geometry',
 'sugarcane_area',
 'others_area',
 'other vegetation_area',
 'cotton_area',
 'banana_area',
 'orchards_area']

In [300]:
bound_json = []

for i in ans:
    bound_json.append(json.loads(i.to_json()))


In [301]:
# Saving all dataframes as json geojson
for i in range(len(ans)):
    ans[i].to_file(str(i) + '.geojson', driver='GeoJSON')

In [330]:
ans[1]

Unnamed: 0,Boundary Name,id,esurvey_area,geometry,sugarcane_area,others_area,other vegetation_area,cotton_area,banana_area,orchards_area
0,Additional Dehs,8,41,"MULTIPOLYGON (((68.77632 25.08338, 68.77563 25...",2121,22597,6260,6930,7528,163
1,Newly Allocated Area,2,946,"MULTIPOLYGON (((68.60823 25.09789, 68.60967 25...",4059,22738,8412,10881,8486,2962
2,Sector Chamber,5,522,"MULTIPOLYGON (((68.53591 25.13183, 68.53364 25...",1538,9547,2581,4592,1506,374
3,Sector Chandia,3,815,"MULTIPOLYGON (((68.74170 25.17044, 68.74123 25...",935,6142,2116,1921,1772,159
4,Sector Daro Sendi,4,1,"MULTIPOLYGON (((68.75074 25.20248, 68.74845 25...",1645,20059,3151,3654,444,261
5,Sector Digh Mori,9,49,"MULTIPOLYGON (((68.65335 25.20794, 68.65266 25...",471,1097,1577,1302,1818,740
6,Sector Gate,6,2107,"MULTIPOLYGON (((68.72504 25.28414, 68.72550 25...",1954,13022,4800,8315,3357,1698
7,Sector Jhan Mori,1,1321,"MULTIPOLYGON (((68.55633 25.26064, 68.54919 25...",1047,7109,2765,4875,1909,3334
8,Sector Khokher,7,118,"MULTIPOLYGON (((68.72225 25.32052, 68.72106 25...",1397,5227,2537,3750,6798,2476


In [303]:
def json_creator(boundary_data,bound_df,survey_title,date):

    date_key = date
    crop_scan_report_type = report_type


    desired_json_object = {
        'survey_title': survey_title,
        'agg_stats': {
            date_key: {
                crop_scan_report_type: {},
            }
        },
        'geometry': []
    }

    
    bound_cols = list(bound_df.columns)
    bound_cols.remove('geometry')

    crop_scan_entry = {}
    prop_dic = {}


    # Iterate through features and add entries to the 'agg_stats' dictionary
    
    for feature in boundary_data['features']:
        
        crop_scan_entry = {}  # Create a new dictionary for each feature
        prop_dic = {}
        
        for i in bound_cols:
            if i != 'esurvey_area':
                if i.replace("_area", "") in crop_id_to_name_dic.values():
                    crop_scan_entry[i.replace("_area", "").title()] = feature['properties'][i]
                else:
                    
                    j = i.replace("_area", "").title()
                    
                    if j == 'Id':
                        j = 'id'
                    
                    prop_dic[j] = feature['properties'][i] 
            else:
                
                j = i.replace("_area", "").title()
                    
                if j == 'Id':
                    j = 'id'
                prop_dic[j] = feature['properties'][i]

        # Extract geometry information
        
        geometry = {
            'type': bound_df['geometry'].geom_type[0],
            'coordinates': feature['geometry']['coordinates']
        }
        
        
        
        
        
        desired_json_object['agg_stats'][date_key][crop_scan_report_type][prop_dic['id']] = crop_scan_entry

        if report_type == 'Crop Scan':
            prop_dic['Crop Area'] = crop_scan_entry[crop]
        else:
            c_area = 0
            
            for i in crop_scan_entry.values():
                c_area += i
            
            prop_dic['Crop Area'] = c_area
            
              
        
        if 'Esurvey' not in prop_dic.keys():
             prop_dic['Esurvey'] = '-'    
        
        prop_dic['Esurvey Area'] = prop_dic['Esurvey']
        del prop_dic['Esurvey']

        # Add the geometry information directly to the JSON object
        desired_json_object['geometry'].append({
            'type': 'FeatureCollection',
             'properties' : prop_dic,
             'geometry': geometry
        })


    # Convert the JSON object to a JSON string
    desired_json_string = json.dumps(desired_json_object, indent=2)

    # Print the desired JSON string
    print(desired_json_string)
    

    return desired_json_object




In [304]:
json_obj = []

for i in range(len(ans)):
    json_obj.append(json_creator(bound_json[i] , ans[i] , survey_titles[i],date))

    

json_obj[1]

{
  "survey_title": "aoi",
  "agg_stats": {
    "2023-09-11": {
      "Crop Scan": {
        "1": {
          "Sugarcane": 15171,
          "Others": 107539,
          "Other Vegetation": 34202,
          "Cotton": 46224,
          "Banana": 33622,
          "Orchards": 12171
        }
      }
    }
  },
  "geometry": [
    {
      "type": "FeatureCollection",
      "properties": {
        "Boundary Name": "Faran",
        "id": 1,
        "Crop Area": 15171,
        "Esurvey Area": 5925
      },
      "geometry": {
        "type": "MultiPolygon",
        "coordinates": [
          [
            [
              [
                68.57203621290097,
                25.382275406285036
              ],
              [
                68.5688953243598,
                25.3854162948262
              ],
              [
                68.5688953243598,
                25.38570183047517
              ],
              [
                68.56832425396118,
                25.38884271901634
      

{'survey_title': 'gates',
 'agg_stats': {'2023-09-11': {'Crop Scan': {8: {'Sugarcane': 2121,
     'Others': 22597,
     'Other Vegetation': 6260,
     'Cotton': 6930,
     'Banana': 7528,
     'Orchards': 163},
    2: {'Sugarcane': 4059,
     'Others': 22738,
     'Other Vegetation': 8412,
     'Cotton': 10881,
     'Banana': 8486,
     'Orchards': 2962},
    5: {'Sugarcane': 1538,
     'Others': 9547,
     'Other Vegetation': 2581,
     'Cotton': 4592,
     'Banana': 1506,
     'Orchards': 374},
    3: {'Sugarcane': 935,
     'Others': 6142,
     'Other Vegetation': 2116,
     'Cotton': 1921,
     'Banana': 1772,
     'Orchards': 159},
    4: {'Sugarcane': 1645,
     'Others': 20059,
     'Other Vegetation': 3151,
     'Cotton': 3654,
     'Banana': 444,
     'Orchards': 261},
    9: {'Sugarcane': 471,
     'Others': 1097,
     'Other Vegetation': 1577,
     'Cotton': 1302,
     'Banana': 1818,
     'Orchards': 740},
    6: {'Sugarcane': 1954,
     'Others': 13022,
     'Other Vegetat

In [305]:
json_obj

[{'survey_title': 'aoi',
  'agg_stats': {'2023-09-11': {'Crop Scan': {1: {'Sugarcane': 15171,
      'Others': 107539,
      'Other Vegetation': 34202,
      'Cotton': 46224,
      'Banana': 33622,
      'Orchards': 12171}}}},
  'geometry': [{'type': 'FeatureCollection',
    'properties': {'Boundary Name': 'Faran',
     'id': 1,
     'Crop Area': 15171,
     'Esurvey Area': 5925},
    'geometry': {'type': 'MultiPolygon',
     'coordinates': [[[[68.57203621290097, 25.382275406285036],
        [68.5688953243598, 25.3854162948262],
        [68.5688953243598, 25.38570183047517],
        [68.56832425396118, 25.38884271901634],
        [68.5674676479137, 25.391412537158878],
        [68.57003746605625, 25.395695566497242],
        [68.5728928189485, 25.397694314241274],
        [68.57603370748967, 25.402548414877508],
        [68.57774691958474, 25.405689303418683],
        [68.58117334377488, 25.40740251461443],
        [68.58402869666708, 25.40740251461443],
        [68.60915580499648, 25.3

In [306]:
%%time
# Final Json


# Structure
surveys = [
    {
        'user_name': user_name,
        'survey_season': season,
        'crop' : crop,
        'total_stats' : total_stats,
        'survey_array': []
    }
]


for i in json_obj:
    surveys[0]['survey_array'].append(i)

CPU times: total: 0 ns
Wall time: 0 ns


In [307]:
# Save surveys as a JSON object
with open(user_name + ' ' + report_type + '.json', 'w') as json_file:
    json.dump(surveys, json_file, indent=2)

print("your json file has been created.")

your json file has been created.
