In [560]:
#Importing Required librairies
import pandas as pd
import geopandas as gp
import json


import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_rows',10)  # Set to None for unlimited rows
pd.set_option('display.max_columns', None)  # Set to None for unlimited columns

In [561]:
# Writing 3 important functions

def area_acres(df_merged):
    
    """
    function that takes in a dataframe as input and adds column of area acre in it
    """
    
    df_merged['area acre'] =  (df_merged.area / 4046.8564224).round(2)
    
def area_hectares(df_merged):
    
    """
    function that takes in a dataframe as input and adds column of area hectare in it
    """
    
    df_merged['area hectare'] =  (df_merged.area / 10000).round(2)

def crop_dic_creator():
    
    """
    function that creates a dictonary with 'Crop ID' as keys and 'Crop Name' as values
    """
    
    crop_names = ['urban','sugarcane','cotton','maize','others','orchards','juaar','rice','other vegetation','chilli','mustard','canola','banana','tobacco','wheat','tomato','mountain','CP-77400',
'CPF-237',
'CPF-246',
'CPF-253',
'HSF-240',
'HSF-242',
'NSG-59',
'Other Variety',
'Ponda',
'SPF-234'
]
    
    
    
    crop_types_dic = dict(zip(range(len(crop_names)),crop_names))
    
    crop_types_dic[100] = 'esurvey'
    
    # Stress 
    crop_types_dic[77] = 'Stress'
    crop_types_dic[78] = 'Potential Stress'
    crop_types_dic[79] = 'No Stress'
    crop_types_dic[80] = 'High Vigour'
    
    
    
    # Health
    
    crop_types_dic[73] = 'Low Vegetation'
    crop_types_dic[74] = 'Moderate'
    crop_types_dic[75] = 'Good'
    crop_types_dic[76] = 'Excellent'
    
    
    # VRA
    
    crop_types_dic[83] = 'Low'
    crop_types_dic[84] = 'Medium'
    crop_types_dic[85] = 'High'
    
    
    # SOM
    
    crop_types_dic[88] = 'Low'
    crop_types_dic[87] = 'Medium'
    crop_types_dic[86] = 'High'
    
    
    # Harvest Monitering
    
    crop_types_dic[81] = 'Remaining Sugarcane'
    crop_types_dic[82] = 'Harvested'
    
    
    
    
    # Silage or Grain
    
    crop_types_dic[101] = 'Grain'
    crop_types_dic[102] = 'Silage'
    
    
    # Sowing
    
    crop_types_dic[61] = 'January'
    crop_types_dic[62] = 'February'
    crop_types_dic[63] = 'March'
    crop_types_dic[64] = 'April'
    
    crop_types_dic[65] = 'May'
    crop_types_dic[66] = 'June'
    crop_types_dic[67] = 'July'
    crop_types_dic[68] = 'August'
    
    crop_types_dic[69] = 'September'
    crop_types_dic[70] = 'October'
    crop_types_dic[71] = 'November'
    crop_types_dic[72] = 'December'
    
    
        
    return crop_types_dic



In [562]:
# Writing main intersect function

def intersect(bound_df,model_df,name):
    
    """Takes in boundary and model file and returns areawise stats

    Args:
        bound_df: Geodataframe of UC or District or Tehsil
        model_df: Geodataframe of maize or other or other vegetation
        area : string that tells if bound_df is UC or Tehsil or District
    Returns:
        intersection: Geodataframe that tells UC or District or Tehsilwise stats
        
    """
    
    # Removing passbook before intersecting
    
    if 'passbook' in model_df.columns:
        model_df = model_df.drop('passbook',axis = 1)
    
    
    # Changing Crs of both Boundary Data and Model Data
    orignal_crs = bound_df.crs
    
    estimated_utm_crs_bound = bound_df.estimate_utm_crs().to_string()
    estimated_utm_crs_model = model_df.estimate_utm_crs().to_string()
    
    bound_df = bound_df.to_crs(estimated_utm_crs_bound)
    model_df = model_df.to_crs(estimated_utm_crs_model)
    

    crop_name = crop_id_to_name_dic[model_df['crop id'][0]]
        
    
    intersection = bound_df.overlay(model_df, how = "intersection")

    # Dissolving intersection polygons based on same UC
    intersection = intersection.dissolve(by = name)
    intersection = intersection.reset_index()
    

    # Finding crop_area and crop_perc
        
    intersection[crop_name + '_area'] = intersection.area / 4046.8564224 

    # dropping crop id
    
    intersection = intersection.drop(['crop id'], axis = 1)
    
    
    # to store geometry for later use
    tb_short = bound_df[[name,'geometry']]
    

    # To cater all those UC's with 0 Crops
    tb_UC = set(bound_df[name])
    intersection_UC = set(intersection[name])
    no_crop_UC = tb_UC.difference(intersection_UC)
    tb_no_crop = bound_df[bound_df[name].isin(no_crop_UC)]
    tb_no_crop[crop_name + '_area'] = 0
    intersection = pd.concat([intersection,tb_no_crop],axis = 0)
    
    # Changing datatype from float to int
 
    intersection[crop_name + '_area'] = intersection[crop_name + '_area'].astype(int)
    
    
    intersection = intersection.rename(columns = {'geometry' : 'Geo'})
    intersection = intersection.set_geometry('Geo')
    intersection = intersection.merge(tb_short, on= name, how='inner')
    intersection = intersection.set_geometry('geometry')
    intersection.drop(columns = ['Geo'],inplace = True)
    
    
    # Converting back to orignal Crs
    intersection = intersection.to_crs(orignal_crs)
    
    return intersection

In [563]:
# Writing the questions asking function

def questions():

    """
    Asking file no's that need to merged ans storing it in variable "files"
    
    """
    
    global user_name
    global season
    global crop
    global report_type
    
    
    print('Enter User Name?')
    user_name = input()
    
    print('Enter Season ?')
    season = input()
    
    print('Enter Crop Name ?')
    crop = input()
    
    print('Enter Report Type?')
    report_type = input()
    

    print('How Many Files to be Merged?')
    files = int(input())
    
    # Asking path of each file and storing in array "paths"

    for i in range(files):

        print('Enter Path of File No ', i + 1)
        path = input()    
        path = path.strip('"')
        paths.append(path)
        
          
    # Asking  Unit of Area and storing it in variable "area"

    print('Do you need Area in Hectares or Acres ? (Type H for Hectares or A for Acres)')
    area = input()

In [564]:
# Writing report_tileset_Creation function

def report_tileset_creation():
   

    # Reading all dataframes from paths to array "dfs" 
    
    global dfs
    dfs = []

    for i in paths:
        dfs.append(gp.read_file(i))


    # combining all dataframes from array "dfs" to make one dataframe "df_merged"

    df_merged = pd.concat(dfs, ignore_index=True)

    # storing orignal crs of dataframe

    og_crs = df_merged.crs
    
    
    # Changing the crs of dataframe to estimate crs

    estimated_utm_crs = df_merged.estimate_utm_crs().to_string()

    df_merged = df_merged.to_crs(estimated_utm_crs)

    # changing datatype of column 'predicted' to integer

    df_merged['predicted'] = df_merged['predicted'].astype(int)

    # changing name of column 'predicted' to 'crop id'

    df_merged = df_merged.rename(columns = {'predicted': 'crop id'})
    

    # making variable called 'pred_arr' that stores all unique crop id in data

    pred_arr = df_merged['crop id'].unique()

    
    # Splitting merged_df to different dataframes that has seperate crops and storing them in array "dfs_out"

    dfs_out = []

    for i in pred_arr:
        dfs_out.append(df_merged[df_merged['crop id'] == i]) 
        
    # For each dataframe in dfs_out


    for i in range(len(dfs_out)):

        # Adding area column

        if area == 'A':
            area_acres(dfs_out[i])
        elif area == 'H':
            area_hectares(dfs_out[i])

        # Changing crs back to orignal crs    

        dfs_out[i] = dfs_out[i].to_crs(og_crs)
        
        # reseting index of all dataframes

        dfs_out[i] = dfs_out[i].reset_index(drop=True)
        
    return dfs_out

In [565]:
# Writing the bound_questions function

def bound_questions():
    
    """
    Function that asks boundary questions
    """
    
    
    global date
    global no_boundaries
    global survey_titles
    
    survey_titles = []
    
    
    
    # Asking Number of Boundary Files and storing them in variable "no_boundaries"

    print('Enter Number Of Boundary Files')
    no_boundaries = int(input())
    
    print('Enter Date for this boundary')
    date = input()
    

    # Asking path of each boundary file and storing in array "paths_boundaries"

    for i in range(no_boundaries):

        print('Enter Path of Boundary File No ', i + 1)
        path = input()    
        path = path.strip('"')
        paths_boundaries.append(path)
        
        
        print('Enter Survey Title Of This Boundary')
        survey_titles.append(input()) 
        
        

In [566]:
# Writing intersect_caller function

def intersect_caller(model_df):
    
    """
    function that takes in 1 model dataframe and intersects with all boundary dataframes also adds id to boundary
    """
    
    bound_dfs = []
    intersect_dfs = []

    for i in paths_boundaries:
        bound_dfs.append(gp.read_file(i))
    
    # adding id to boundary file
    for i in range(len(bound_dfs)):
        bound_dfs[i]['id'] = bound_dfs[i].index + 1
    
    
    area = ''
    dissolve = []
    
    for i in bound_dfs:
        
        intersect_dfs.append(intersect(i,model_df,'Boundary Name'))

    return intersect_dfs

In [567]:
%%time
# Calling report tileset creation

diss_column = []
paths = []
area = ''
output_path = ''

no_boundaries = 0
paths_boundaries = []


crop_id_to_name_dic = crop_dic_creator()

questions()
bound_questions()

Enter User Name?
ASML
Enter Season ?
2023
Enter Crop Name ?
Sugarcane
Enter Report Type?
Plant Health
How Many Files to be Merged?
2
Enter Path of File No  1
"C:\Users\FARMDAR\Desktop\Abbas\Data\Asml\Plant Health 28 Nov\Esurvey\esurvey.shp"
Enter Path of File No  2
"C:\Users\FARMDAR\Desktop\Abbas\Data\Asml\Plant Health 28 Nov\Model\ASML_PlantHealth.shp"
Do you need Area in Hectares or Acres ? (Type H for Hectares or A for Acres)
A
Enter Number Of Boundary Files
3
Enter Date for this boundary
2023-11-11
Enter Path of Boundary File No  1
"C:\Users\FARMDAR\Desktop\Abbas\Data\Asml\Data For Script Running\aoi.geojson"
Enter Survey Title Of This Boundary
aoi
Enter Path of Boundary File No  2
"C:\Users\FARMDAR\Desktop\Abbas\Data\Asml\Data For Script Running\tehsil.geojson"
Enter Survey Title Of This Boundary
tehsil
Enter Path of Boundary File No  3
"C:\Users\FARMDAR\Desktop\Abbas\Data\Asml\Data For Script Running\uc.geojson"
Enter Survey Title Of This Boundary
uc
CPU times: total: 250 ms
Wall

In [568]:
dfs_out = report_tileset_creation()

In [569]:
# Checking the dataframes

crop_id_to_name_dic

{0: 'urban',
 1: 'sugarcane',
 2: 'cotton',
 3: 'maize',
 4: 'others',
 5: 'orchards',
 6: 'juaar',
 7: 'rice',
 8: 'other vegetation',
 9: 'chilli',
 10: 'mustard',
 11: 'canola',
 12: 'banana',
 13: 'tobacco',
 14: 'wheat',
 15: 'tomato',
 16: 'mountain',
 17: 'CP-77400',
 18: 'CPF-237',
 19: 'CPF-246',
 20: 'CPF-253',
 21: 'HSF-240',
 22: 'HSF-242',
 23: 'NSG-59',
 24: 'Other Variety',
 25: 'Ponda',
 26: 'SPF-234',
 100: 'esurvey',
 77: 'Stress',
 78: 'Potential Stress',
 79: 'No Stress',
 80: 'High Vigour',
 73: 'Low Vegetation',
 74: 'Moderate',
 75: 'Good',
 76: 'Excellent',
 83: 'Low',
 84: 'Medium',
 85: 'High',
 88: 'Low',
 87: 'Medium',
 86: 'High',
 81: 'Remaining Sugarcane',
 82: 'Harvested',
 101: 'Grain',
 102: 'Silage',
 61: 'January',
 62: 'February',
 63: 'March',
 64: 'April',
 65: 'May',
 66: 'June',
 67: 'July',
 68: 'August',
 69: 'September',
 70: 'October',
 71: 'November',
 72: 'December'}

In [570]:
%%time
# Calling the intersect caller
#ans_dfs is 2d array with first index as crop type and second index as boundary type

ans_dfs = []

for df in dfs_out:
    ans_dfs.append(intersect_caller(df))

CPU times: total: 25.7 s
Wall time: 43.7 s


In [571]:
# Joining Dataframes

# Initialize the ans list with copies of the first DataFrame in ans_dfs
ans = [ans_dfs[0][j].copy() for j in range(len(ans_dfs[0]))]

# Extract the columns from the first DataFrame as potential merge columns
merge_columns = list(ans[0].columns)

# Loop through each DataFrame in ans_dfs
for i in range(1, len(ans_dfs)):  # Start from the second DataFrame (index 1)
    for j in range(len(ans_dfs[i])):
        # Extract the common columns between the current DataFrame and merge_columns
        common_columns = list(set(merge_columns) & set(ans_dfs[i][j].columns))
        


        # Merge the current DataFrame with ans[j] based on the dynamically determined common columns
        ans[j] = ans[j].merge(ans_dfs[i][j], on=common_columns, how='inner')

# ans now contains the merged DataFrames with dynamically determined merge columns


# Removing Duplicated Columns

for i in range(len(ans)):

    # Get a list of columns to drop based on suffixes
    columns_to_drop = [col for col in ans[i].columns if col.endswith(('_y', '_z'))]

    # Drop the columns with the specified suffixes
    ans[i] = ans[i].drop(columns=columns_to_drop)
    
    # stripping all columns with name ending with _x

    cols = list(ans[i].columns)
    new_cols = []

    for j in ans[i].columns:
        j = j.strip('_x')
        new_cols.append(j)

    ans[i].columns = new_cols
        
        
        
    ans[i] = ans[i].loc[:, ~ans[i].columns.duplicated()]
    
    

In [572]:
ans[1].head()

Unnamed: 0,Boundary Name,id,esurvey_area,geometry,Good_area,Moderate_area,Excellent_area,Low Vegetation_area
0,Ahmadpur East,4,2191,"MULTIPOLYGON (((71.37218 29.39407, 71.37584 29...",1434,155,390,125
1,Bahawalpur,2,17619,"MULTIPOLYGON (((71.48697 29.38430, 71.48778 29...",13783,2123,1211,1411
2,Bahawalpur City,3,1091,"MULTIPOLYGON (((71.72079 29.40487, 71.72461 29...",745,94,143,105
3,Yazman,1,1977,"MULTIPOLYGON (((71.28842 29.12464, 71.29091 29...",1615,243,301,194


In [573]:
# Making total_stats dictonary

total_stats = {}

# Finding Total Area
Total_Area = 0


#will not work when no aoi
for df in ans:
    
        if len(df) == 1 : 
        
            estimated_utm_crs = df.estimate_utm_crs().to_string()
            df = df.to_crs(estimated_utm_crs)
            Total_Area = int((df.area / 4046.8564224).sum())


total_stats['Total Area'] = str(Total_Area)
    
# Finding Total Growers
growers = 0 

for df in dfs:
    if 'passbook' in df.columns:
        growers = str(df['passbook'].nunique())

total_stats['Total Growers'] = str(growers)


#Finding Total Esurvey

esurvey = 0

if 'esurvey_area' not in list(ans[0].columns):
    esurvey = 'N/A'
else:
    for df in dfs:
        if df['predicted'].iloc[0] == '100' or df['predicted'].iloc[0] == 100 :
            estimated_utm_crs = df.estimate_utm_crs().to_string()
            df = df.to_crs(estimated_utm_crs)
            esurvey = ((df.area / 4046.8564224).round(2)).sum().round(2)
            
total_stats['Total Esurvey'] = str(esurvey)


# Finding Total Crop Area

#array = [str(num) for num in range(17) if num != 1]

total_crop_area = 0
for df in ans:
    
#     if df['predicted'].nunique() >= 1:
        
#         print(df['predicted'].unique())
        
#         df = df[~df['predicted'].isin(array)]
        
        
#         print(df['predicted'].unique())
    
    if len(df) == 1 :
        
        for i in df.columns:
            if i.endswith('_area'):
                total_crop_area += df[i].sum()
                
        
        

#         estimated_utm_crs = df.estimate_utm_crs().to_string()
#         df = df.to_crs(estimated_utm_crs)
#         total_crop_area += int((df.area / 4046.8564224).round(2).sum().round(2))
        
total_stats['Total Crop Area'] = str(total_crop_area)
                
total_stats    

{'Total Area': '290951',
 'Total Growers': '2945',
 'Total Esurvey': '22987.13',
 'Total Crop Area': '46958'}

In [574]:
ans[0]['Boundary Name'].dtype

dtype('O')

In [575]:
list(ans[0].columns)

['Boundary Name',
 'id',
 'esurvey_area',
 'geometry',
 'Good_area',
 'Moderate_area',
 'Excellent_area',
 'Low Vegetation_area']

In [576]:
bound_json = []

for i in ans:
    bound_json.append(json.loads(i.to_json()))


In [577]:
# Saving all dataframes as json geojson
for i in range(len(ans)):
    ans[i].to_file(survey_titles[i] +' '+user_name+ ' '+ report_type+ ' '+  date +  '.geojson', driver='GeoJSON')

In [578]:
ans[1]

Unnamed: 0,Boundary Name,id,esurvey_area,geometry,Good_area,Moderate_area,Excellent_area,Low Vegetation_area
0,Ahmadpur East,4,2191,"MULTIPOLYGON (((71.37218 29.39407, 71.37584 29...",1434,155,390,125
1,Bahawalpur,2,17619,"MULTIPOLYGON (((71.48697 29.38430, 71.48778 29...",13783,2123,1211,1411
2,Bahawalpur City,3,1091,"MULTIPOLYGON (((71.72079 29.40487, 71.72461 29...",745,94,143,105
3,Yazman,1,1977,"MULTIPOLYGON (((71.28842 29.12464, 71.29091 29...",1615,243,301,194


In [579]:
def json_creator(boundary_data,bound_df,survey_title,date):

    date_key = date
    crop_scan_report_type = report_type


    desired_json_object = {
        'survey_title': survey_title,
        'agg_stats': {
            date_key: {
                crop_scan_report_type: {},
            }
        },
        'geometry': []
    }

    
    bound_cols = list(bound_df.columns)
    bound_cols.remove('geometry')

    crop_scan_entry = {}
    prop_dic = {}


    # Iterate through features and add entries to the 'agg_stats' dictionary
    
    for feature in boundary_data['features']:
        
        crop_scan_entry = {}  # Create a new dictionary for each feature
        prop_dic = {}
        
        for i in bound_cols:
            if i != 'esurvey_area':
                if i.replace("_area", "") in crop_id_to_name_dic.values():
                    crop_scan_entry[i.replace("_area", "").title()] = feature['properties'][i]
                else:
                    
                    j = i.replace("_area", "").title()
                    
                    if j == 'Id':
                        j = 'id'
                    
                    prop_dic[j] = feature['properties'][i] 
            else:
                
                j = i.replace("_area", "").title()
                    
                if j == 'Id':
                    j = 'id'
                prop_dic[j] = feature['properties'][i]

        # Extract geometry information
        
        geometry = {
            'type': bound_df['geometry'].geom_type[0],
            'coordinates': feature['geometry']['coordinates']
        }
        
        
        
        
        
        desired_json_object['agg_stats'][date_key][crop_scan_report_type][prop_dic['id']] = crop_scan_entry

        if report_type == 'Crop Scan':
            prop_dic['Crop Area'] = crop_scan_entry[crop]
        else:
            c_area = 0
            
            for i in crop_scan_entry.values():
                c_area += i
            
            prop_dic['Crop Area'] = c_area
            
              
        
        if 'Esurvey' not in prop_dic.keys():
             prop_dic['Esurvey'] = '-'    
        
        prop_dic['Esurvey Area'] = prop_dic['Esurvey']
        del prop_dic['Esurvey']

        # Add the geometry information directly to the JSON object
        desired_json_object['geometry'].append({
            'type': 'FeatureCollection',
             'properties' : prop_dic,
             'geometry': geometry
        })


    # Convert the JSON object to a JSON string
    desired_json_string = json.dumps(desired_json_object, indent=2)

    # Print the desired JSON string
    print(desired_json_string)
    

    return desired_json_object




In [580]:
json_obj = []

for i in range(len(ans)):
    json_obj.append(json_creator(bound_json[i] , ans[i] , survey_titles[i],date))

    

json_obj[0]

{
  "survey_title": "aoi",
  "agg_stats": {
    "2023-11-11": {
      "Plant Health": {
        "1": {
          "Good": 17578,
          "Moderate": 2617,
          "Excellent": 2046,
          "Low Vegetation": 1837
        }
      }
    }
  },
  "geometry": [
    {
      "type": "FeatureCollection",
      "properties": {
        "Boundary Name": "Ashraf",
        "id": 1,
        "Crop Area": 24078,
        "Esurvey Area": 22880
      },
      "geometry": {
        "type": "MultiPolygon",
        "coordinates": [
          [
            [
              [
                71.6549666504118,
                29.444890961263695,
                0.0
              ],
              [
                71.6555677255676,
                29.444914079538897,
                0.0
              ],
              [
                71.6624107350332,
                29.4444979505849,
                0.0
              ],
              [
                71.6748524589073,
                29.442877377170603,

{'survey_title': 'aoi',
 'agg_stats': {'2023-11-11': {'Plant Health': {1: {'Good': 17578,
     'Moderate': 2617,
     'Excellent': 2046,
     'Low Vegetation': 1837}}}},
 'geometry': [{'type': 'FeatureCollection',
   'properties': {'Boundary Name': 'Ashraf',
    'id': 1,
    'Crop Area': 24078,
    'Esurvey Area': 22880},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[71.6549666504118, 29.444890961263695, 0.0],
       [71.6555677255676, 29.444914079538897, 0.0],
       [71.6624107350332, 29.4444979505849, 0.0],
       [71.6748524589073, 29.442877377170603, 0.0],
       [71.6877046477304, 29.441274232946903, 0.0],
       [71.6991289890308, 29.439095023334698, 0.0],
       [71.7054963737345, 29.438157207964707, 0.0],
       [71.7179851432925, 29.4300919014613, 0.0],
       [71.7207948678857, 29.404870682545404, 0.0],
       [71.7354086917598, 29.350682943268495, 0.0],
       [71.7435974568011, 29.269529164760893, 0.0],
       [71.6937900521256, 29.237450515231284, 0.0],
 

In [581]:
json_obj

[{'survey_title': 'aoi',
  'agg_stats': {'2023-11-11': {'Plant Health': {1: {'Good': 17578,
      'Moderate': 2617,
      'Excellent': 2046,
      'Low Vegetation': 1837}}}},
  'geometry': [{'type': 'FeatureCollection',
    'properties': {'Boundary Name': 'Ashraf',
     'id': 1,
     'Crop Area': 24078,
     'Esurvey Area': 22880},
    'geometry': {'type': 'MultiPolygon',
     'coordinates': [[[[71.6549666504118, 29.444890961263695, 0.0],
        [71.6555677255676, 29.444914079538897, 0.0],
        [71.6624107350332, 29.4444979505849, 0.0],
        [71.6748524589073, 29.442877377170603, 0.0],
        [71.6877046477304, 29.441274232946903, 0.0],
        [71.6991289890308, 29.439095023334698, 0.0],
        [71.7054963737345, 29.438157207964707, 0.0],
        [71.7179851432925, 29.4300919014613, 0.0],
        [71.7207948678857, 29.404870682545404, 0.0],
        [71.7354086917598, 29.350682943268495, 0.0],
        [71.7435974568011, 29.269529164760893, 0.0],
        [71.6937900521256, 29.2

In [582]:
%%time
# Final Json


# Structure
surveys = [
    {
        'user_name': user_name,
        'survey_season': season,
        'crop' : crop,
        'total_stats' : total_stats,
        'survey_array': []
    }
]


for i in json_obj:
    surveys[0]['survey_array'].append(i)

CPU times: total: 0 ns
Wall time: 0 ns


In [583]:
# Save surveys as a JSON object
with open(user_name + ' ' + report_type + ' ' + date +  '.json', 'w') as json_file:
    json.dump(surveys, json_file, indent=2)

print("your json file has been created.")

your json file has been created.
