In [2]:
import s3fs
import pandas as pd

path = "IHS/ship_data.csv"
bucket = "projet-hackathon-un-2022"

fs = s3fs.S3FileSystem(
  client_kwargs={'endpoint_url': 'https://minio.lab.sspcloud.fr'}
)

ship_data = pd.read_csv(fs.open(f'{bucket}/{path}',
                         mode='rb'))
ship_codes = pd.read_csv(fs.open(f'{bucket}/{path}'.replace("data","codes"),
                         mode='rb')
                 )

In [3]:
ship_data_enriched = ship_data.merge(ship_codes, on = "StatCode5")
ship_data_enriched.value_counts('ShipTypeLevel1', normalize = True)

ShipTypeLevel1
Cargo Carrying                 0.508045
Work Vessel                    0.419286
Non Merchant                   0.037187
Non Seagoing Merchant Ships    0.014563
Non Propelled                  0.012390
Non Ship Structure             0.008528
dtype: float64

In [5]:
AIS = pd.read_parquet(fs.open(f'{bucket}/AIS/ais_azov_black_20220001_20220007.parquet',
                         mode='rb'))

# Nombre de bateaux

In [6]:
ship_data_enriched.loc[ship_data_enriched["ShipTypeLevel1"] == "Cargo Carrying"].value_counts('ShipTypeLevel3', normalize = True)

ShipTypeLevel3
General Cargo                0.325074
Bulk Dry                     0.169043
Oil                          0.150646
Container                    0.074034
Chemical                     0.067596
Passenger                    0.050659
Ro-Ro Cargo                  0.037783
Passenger/Ro-Ro Cargo        0.037624
Liquefied Gas                0.028497
Other Bulk Dry               0.019434
Refrigerated Cargo           0.017081
Passenger / General Cargo    0.007459
Other Dry Cargo              0.006167
Bulk Dry / Oil               0.003726
Other Liquids                0.002673
Self Discharging Bulk Dry    0.002505
dtype: float64

In [7]:
ship_data_enriched = ship_data_enriched.loc[ship_data_enriched["ShipTypeLevel1"] == "Cargo Carrying",:]

In [8]:
ship_data_enriched = ship_data_enriched.loc[ship_data_enriched["ShipTypeLevel3"].isin(["General Cargo",
                                                                                      "Bulk Dry", 
                                                                                      "Container", 
                                                                                      "Other Bulk Dry",
                                                                                      "Refrigerated Cargo",
                                                                                      "Other Dry Cargo",
                                                                                      "Bulk Dry / Oil",
                                                                                      "Self Discharging Bulk Dry"]),:]

In [9]:
ship_data_enriched.head()

Unnamed: 0,StatCode5,MaritimeMobileServiceIdentityMMSINumber,ShipStatusEffectiveDate,ShiptypeLevel5,LRIMOShipNo,FuelConsumptionTotal,GrossTonnage,NetTonnage,ShipTypeLevel1,ShipTypeLevel2,ShipTypeLevel3,ShipTypeLevel4,ShipTypeLevel5,SubGroup,SubType
87293,A31A2GX,374511000.0,19810101,General Cargo Ship,2629551,0.0,192,57,Cargo Carrying,Dry Cargo/Passenger,General Cargo,General Cargo Ship,General Cargo Ship,General Cargo,General Cargo
87294,A31A2GX,,20200101,General Cargo Ship,4625638,0.0,3040,912,Cargo Carrying,Dry Cargo/Passenger,General Cargo,General Cargo Ship,General Cargo Ship,General Cargo,General Cargo
87295,A31A2GX,,19831216,General Cargo Ship,4625640,0.0,3040,912,Cargo Carrying,Dry Cargo/Passenger,General Cargo,General Cargo Ship,General Cargo Ship,General Cargo,General Cargo
87296,A31A2GX,,20200101,General Cargo Ship,4625652,0.0,3040,912,Cargo Carrying,Dry Cargo/Passenger,General Cargo,General Cargo Ship,General Cargo Ship,General Cargo,General Cargo
87297,A31A2GX,,20200101,General Cargo Ship,4625664,0.0,3040,912,Cargo Carrying,Dry Cargo/Passenger,General Cargo,General Cargo Ship,General Cargo Ship,General Cargo,General Cargo


In [10]:
ship_data_enriched["ShipTypeLevel4"].value_counts()

General Cargo Ship               39960
Bulk Carrier                     20379
Container Ship                    9275
Refrigerated Cargo Ship           2141
Aggregates Carrier                1204
Cement Carrier                     818
Ore Carrier                        810
Deck Cargo Ship                    717
Livestock Carrier                  388
Wood Chips Carrier                 321
Heavy Load Carrier                 320
Self Discharging Bulk Carrier      314
Ore/Oil Carrier                    252
Bulk/Oil Carrier                   215
Limestone Carrier                   75
Palletised Cargo Ship               70
Barge Carrier                       39
Nuclear Fuel Carrier                23
Powder Carrier                       8
Urea Carrier                         8
Passenger/Container Ship             5
Pulp Carrier                         3
Refined Sugar Carrier                2
Name: ShipTypeLevel4, dtype: int64

In [11]:
AIS.shape # 179 988

(1543295, 9)

In [12]:
AIS.loc[AIS["mmsi"].isna(), :].shape

(0, 9)

In [13]:
AIS["mmsi"].value_counts()

272683000    2274
214182724    2069
264900201    1951
271230300    1924
271045807    1911
             ... 
271046450       1
271043013       1
271041417       1
271046709       1
244830391       1
Name: mmsi, Length: 3253, dtype: int64

In [14]:
AIS_enriched = AIS.merge(
    ship_data_enriched, left_on = 'mmsi',
    right_on = "MaritimeMobileServiceIdentityMMSINumber", how="left")

In [15]:
AIS_enriched.head()

Unnamed: 0,hex_resolution,longitude,mmsi,destination,eeid,H3_int_index_8,latitude,dt_insert_utc,polygon_name,StatCode5,...,FuelConsumptionTotal,GrossTonnage,NetTonnage,ShipTypeLevel1,ShipTypeLevel2,ShipTypeLevel3,ShipTypeLevel4,ShipTypeLevel5,SubGroup,SubType
0,8,31.408333,272157700,MYKOLAIV,5191743282127358980,613021963599740927,47.526667,2022-01-01 05:58:49,Polygon,,...,,,,,,,,,,
1,8,31.331667,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 18:25:52,Polygon,,...,,,,,,,,,,
2,8,31.331667,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 21:14:30,Polygon,,...,,,,,,,,,,
3,8,31.333333,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 07:46:57,Polygon,,...,,,,,,,,,,
4,8,31.333333,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 09:43:50,Polygon,,...,,,,,,,,,,


In [25]:
AIS_enriched.shape

(179988, 23)

In [16]:
AIS_enriched.columns

Index(['hex_resolution', 'longitude', 'mmsi', 'destination', 'eeid',
       'H3_int_index_8', 'latitude', 'dt_insert_utc', 'polygon_name',
       'StatCode5', 'MaritimeMobileServiceIdentityMMSINumber',
       'ShipStatusEffectiveDate', 'ShiptypeLevel5', 'LRIMOShipNo',
       'FuelConsumptionTotal', 'GrossTonnage', 'NetTonnage', 'ShipTypeLevel1',
       'ShipTypeLevel2', 'ShipTypeLevel3', 'ShipTypeLevel4', 'ShipTypeLevel5',
       'SubGroup', 'SubType'],
      dtype='object')

In [20]:
mmsi_number = pd.read_csv(fs.open(f'{bucket}/AIS/mmid.csv',
                         mode='rb'), error_bad_lines=False, encoding='iso-8859-1', sep=";")



  mmsi_number = pd.read_csv(fs.open(f'{bucket}/AIS/mmid.csv',


In [21]:
mmsi_number.head()

Unnamed: 0,Digit,Allocated to
0,201,Albania (Republic of)
1,202,Andorra (Principality of)
2,203,Austria
3,204,Portugal - Azores
4,205,Belgium


In [25]:
AIS_enriched["Digit"] = AIS_enriched["mmsi"].astype(str).str[0:3]

In [24]:
AIS_enriched["mmsi_digits"].head()

0    272
1    272
2    272
3    272
4    272
Name: mmsi_digits, dtype: object

In [29]:
mmsi_number["Digit"] = mmsi_number["Digit"].astype(str)

In [30]:
AIS_enriched2 = pd.merge(AIS_enriched, mmsi_number, on=["Digit"], how="left")

In [31]:
AIS_enriched2.head()

Unnamed: 0,hex_resolution,longitude,mmsi,destination,eeid,H3_int_index_8,latitude,dt_insert_utc,polygon_name,StatCode5,...,ShipTypeLevel1,ShipTypeLevel2,ShipTypeLevel3,ShipTypeLevel4,ShipTypeLevel5,SubGroup,SubType,mmsi_digits,Digit,Allocated to
0,8,31.408333,272157700,MYKOLAIV,5191743282127358980,613021963599740927,47.526667,2022-01-01 05:58:49,Polygon,,...,,,,,,,,272,272,Ukraine
1,8,31.331667,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 18:25:52,Polygon,,...,,,,,,,,272,272,Ukraine
2,8,31.331667,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 21:14:30,Polygon,,...,,,,,,,,272,272,Ukraine
3,8,31.333333,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 07:46:57,Polygon,,...,,,,,,,,272,272,Ukraine
4,8,31.333333,272157700,MYKOLAIV,5191743282127358980,613021966030340095,47.535,2022-01-01 09:43:50,Polygon,,...,,,,,,,,272,272,Ukraine


In [37]:
AIS_enriched2.loc[AIS_enriched2["destination"].isna(), :].shape[0]/AIS_enriched2.shape[0] # 377 313, 24 %

0.24448533818874552

In [34]:
AIS_enriched2.loc[~AIS_enriched2["destination"].isna(), :].shape # 1 165 982

(1165982, 27)