In [1]:
import pandas as pd
from geopy.distance import geodesic
import llama_cpp
import geopandas
from shapely.geometry import Point
import os

In [2]:
df_data=pd.read_csv("../datasets/data.csv")
df_dummy_janes=pd.read_csv("../datasets/dummy_janes_POI.csv")

eez_indian_boundaries_path="../datasets/india_eez.json"
india_eez_gdf = geopandas.read_file(eez_indian_boundaries_path)
eez_geometry=india_eez_gdf.unary_union

  eez_geometry=india_eez_gdf.unary_union


In [3]:
df_dummy_janes['location']=list(zip(df_dummy_janes['latitude'], df_dummy_janes['longitude']))
df_data['location']=list(zip(df_data['latitude'], df_data['longitude']))

In [4]:
def distance_between_locations(target_location, POI_location):
    '''
    Calculates distance between two given locations, likely between target be observed, and points of military interest
    input: latitude, longitude of target, and latitude, longitude of POI
    output: distance
    '''
    distance=geodesic(target_location, POI_location).kilometers
    return distance

In [5]:
def find_closest_poi(target_location):
    '''
    Compares distance between target and locations available in JANES database, and returns closest target
    input: latitude, longitude of target
    output: closest point of military interest
    '''
    array_distances=[]
    
    for index, row in df_dummy_janes.iterrows():
        poi_location=row['location']
        poi_name=row['location_name']

        dist=distance_between_locations(target_location, poi_location)
        array_distances.append([dist, poi_name])

    min_distance=float('inf')
    closest_poi=None
    for dist, name in array_distances:
        if dist<min_distance:
            min_distance=dist
            closest_poi=name

    return closest_poi
    

In [6]:
df_data['closest_point_of_mil_interest']=df_data['location'].apply(find_closest_poi)

In [7]:
def is_location_in_indian_waters(target_location):
    string_true="Inside Indian Waters"
    string_false="Outside Indian Waters"

    location_point=Point(target_location[1], target_location[0])
    if eez_geometry.contains(location_point):
        return string_true
    else:
        return string_false
    
    

In [8]:
df_data['location_wrt_naval_borders']=df_data['location'].apply(is_location_in_indian_waters)

In [9]:
print(df_data.columns, "\n")
print(df_dummy_janes.columns)

Index(['id', 'name', 'latitude', 'longitude', 'range', 'bearing', 'course',
       'speed', 'altitude', 'depth', 'reported_by', 'comment', 'hostility',
       'category', 'nationality', 'location_wrt_naval_borders',
       'closest_point_of_mil_interest', 'time', 'location'],
      dtype='object') 

Index(['latitude', 'longitude', 'location_name', 'location_country',
       'location'],
      dtype='object')


In [10]:
print(df_dummy_janes)
print(df_data)

     latitude  longitude                             location_name  \
0   18.939300  72.844500                                    mumbai   
1   19.005600  72.816300       jawaharlal nehru port (nhava sheva)   
2   17.685600  83.216000                             visakhapatnam   
3   15.411207  73.799978                            mormugao (goa)   
4   13.081500  80.292100                                   chennai   
5   21.733300  87.447800                                   paradip   
6   22.546100  88.314900          kolkata (syama prasad mookerjee)   
7    9.967800  76.280100                            kochi (cochin)   
8   12.967400  74.806600                  new mangalore (panambur)   
9    8.766700  78.133300             tuticorin (v.o.chidambaranar)   
10  13.191100  80.292100                        ennore (kamarajar)   
11  11.623900  92.730300                                port blair   
12  24.835600  66.981400                              karachi port   
13  24.785000  67.05

In [11]:
df_data.to_csv("../datasets/data_processed.csv", index=False)

In [12]:
print(df_data.columns)

Index(['id', 'name', 'latitude', 'longitude', 'range', 'bearing', 'course',
       'speed', 'altitude', 'depth', 'reported_by', 'comment', 'hostility',
       'category', 'nationality', 'location_wrt_naval_borders',
       'closest_point_of_mil_interest', 'time', 'location'],
      dtype='object')
