In [1]:
import pandas as pd
import re

# Import csv file
file_path = 'russia_losses.csv'

# Read the dataset into a pandas DataFrame
losses_df = pd.read_csv(file_path)

# Drop the 'lost_by' and 'unit' columns from the DataFrame
losses_df = losses_df.drop(columns=['lost_by', 'unit'])

# Display the first few rows of the dataset
losses_df.head()

Unnamed: 0,id,type,model,status,date,nearest_location,geo,tags
0,1,Tanks,T-64BV,Destroyed,2022-03-14,"Rubizhne, Sievierodonetsk raion","49.027241,38.343374",
1,2,Tanks,T-64BV,Destroyed,2022-03-16,"Mariupol, Mariupol raion","47.099125851628806,37.52371337039075","Turretless, Z, Mine plow/roller"
2,3,Tanks,T-64BV,Destroyed,2022-03-16,"Mariupol, Mariupol raion","47.09869256359657,37.52353235165147",Shattered
3,5,Tanks,T-64BV,Destroyed,2022-03-17,"Mariupol, Mariupol raion","47.098139835697424,37.640174323260645","Turretless, Z"
4,6,Tanks,T-64BV,Destroyed,2022-03-31,"Rubizhne, Sievierodonetsk raion","49.01122,38.39844",Z


In [2]:
# Add new empty columns to the DataFrame
losses_df['town'] = None
losses_df['raion'] = None
losses_df['oblast'] = None
losses_df['front'] = None

# Extract text inside parentheses and create a new column 'extra'
losses_df['extra'] = losses_df['nearest_location'].map(
    lambda x: re.findall(r'\((.*?)\)', str(x))[0] if pd.notna(x) and re.search(r'\(.*?\)', str(x)) else None)

# Remove the extracted value from the "nearest_location" column
losses_df['nearest_location'] = losses_df['nearest_location'].apply(lambda x: re.sub(r'\(.*?\)', '', str(x)) if pd.notna(x) else x)
# losses_df.head()

In [3]:
# Split the 'nearest_location' column into parts, location[0] and location[1]
location = losses_df['nearest_location'].str.split(',', expand=True)

    # Process location[0] 
losses_df['raion'] = location[0].apply(
    lambda x: x if pd.notna(x) and ' raion' in str(x).lower() else None).str.lstrip()
losses_df['oblast'] = location[0].apply(
    lambda x: x if pd.notna(x) and (' oblast' in str(x).lower() or ' krai' in str(x).lower() or ' crimea ' in str(x).lower() or ' sea' in str(x).lower()) else None).str.lstrip()
losses_df['front'] = location[0].apply(
    lambda x: x if pd.notna(x) and ('ukraine' in str(x).lower() or 'russia' in str(x).lower()) else None).str.lstrip()
losses_df['town'] = location[0].apply(
    lambda x: x if pd.notna(x) and ' raion' not in str(x).lower() and ' oblast' not in str(x).lower() and ' krai' not in str(x).lower() and 'ukraine' not in str(x).lower() and 'russia' not in str(x).lower() else None).str.lstrip()

    # Process location[1] 
losses_df['raion'] = losses_df['raion'].combine_first(location[1].apply(
    lambda x: x if pd.notna(x) and ' raion' in str(x).lower() else None)).str.lstrip()
losses_df['oblast'] = losses_df['oblast'].combine_first(location[1].apply(
    lambda x: x if pd.notna(x) and (' oblast' in str(x).lower() or ' krai' in str(x).lower() or ' crimea' in str(x).lower() or ' sea' in str(x).lower()) else None)).str.lstrip()
losses_df['front'] = losses_df['front'].combine_first(location[1].apply(
    lambda x: x if pd.notna(x) and ('east ukraine' in str(x).lower() or 'russia' in str(x).lower()) else None)).str.lstrip()

    # Process column 'extra'  
losses_df['oblast'] = losses_df['oblast'].combine_first(losses_df['extra'].apply(
    lambda x: x if pd.notna(x) and (' oblast' in str(x).lower() or ' krai' in str(x).lower()) else None)).str.lstrip()

# Split 'geo' column values "{latitude, longitude}"
losses_df[['lat', 'lon']] = losses_df['geo'].str.split(',', expand=True)

# Convert the extracted columns to numeric values (if necessary)
losses_df['lat'] = pd.to_numeric(losses_df['lat'], errors='coerce')
losses_df['lon'] = pd.to_numeric(losses_df['lon'], errors='coerce')

# Drop 'nearest_location', 'extra', and 'geo' columns and rearange columns in new order
losses_df = losses_df.reindex(columns=['id', 'type', 'model', 'status', 'date', 'front', 'oblast', 'raion', 'town', 'lat', 'lon', 'tags'])  


In [4]:
# debuging code to display selected rows 
losses_df.iloc[14150:14155]

Unnamed: 0,id,type,model,status,date,front,oblast,raion,town,lat,lon,tags
14150,25886,Infantry fighting vehicles,MT-LB,Destroyed,2024-04-18,,,Volnovakha raion,Volodymyrivka,47.779417,37.417667,"Cope cage, Expanded cabin"
14151,25887,Infantry fighting vehicles,MT-LB,Destroyed,2024-04-18,,,Volnovakha raion,Volodymyrivka,47.779611,37.418556,Expanded cabin
14152,25888,Tanks,T-62M,Abandoned,2024-04-18,,,Volnovakha raion,Volodymyrivka,47.779177,37.417521,"V, Mine plow/roller, Cope cage, Damaged, ☐, Ad..."
14153,25889,Tanks,T-72B Obr. 2022,Abandoned,2024-03-26,,,Kramatorsk raion,Terny,49.091949,38.029221,"Z, O, V, Cope cage, Damaged"
14154,25890,Infantry fighting vehicles,BMP-2(K),Captured,2022-07-23,,,Beryslav raion,Ivanivka,47.475077,33.386141,Z


In [5]:
# debuging code to display selected rows 
losses_df.iloc[3636:3650]

Unnamed: 0,id,type,model,status,date,front,oblast,raion,town,lat,lon,tags
3636,6914,Infantry fighting vehicles,BMP-2(K),Destroyed,2022-08-06,,,Kramatorsk raion,Mazanivka,48.994053,37.346485,
3637,6915,Infantry fighting vehicles,BTR-70,Destroyed,2022-03-29,,,Mariupol raion,Mariupol,47.10162,37.65046,
3638,6916,Infantry fighting vehicles,BTR-80,Destroyed,2022-08-06,,,Izium raion,Dmytrivka,49.051442,37.114945,Underwater
3639,6919,Drones,Forpost reconnaissance UAV,Destroyed,2022-07-30,,Black Sea,,Black Sea,,,
3640,6920,Drones,'Orlan-30' reconnaissance UAV,Captured,2022-08-05,East Ukraine,,,,,,
3641,6939,Infantry fighting vehicles,BMP-2(K),Destroyed,2022-04-21,,,Nizhyn raion,Makiivka,50.66605,31.82792,
3642,6940,Self-propelled artillery,122mm 2S1 'Gvozdika',Destroyed,2022-03-19,,,Bucha raion,Berestianka,50.69697,29.99858,"Turretless, Shattered"
3643,6949,Infantry fighting vehicles,BMP-2(K),Destroyed,2022-03-31,,,Okhtyrka raion,Vovkiv,50.557402,34.967619,PL-1
3644,6953,Infantry fighting vehicles,BMP-3,Destroyed,2022-04-18,,,Mariupol raion,Mariupol,47.10224,37.63565,Turretless
3645,6954,Tanks,T-72A,Destroyed,2022-04-11,,,Sievierodonetsk raion,Borivske,48.867637,38.610248,


Load REGIONS and UNIQUE_LOCATIOS data to generate regions_dict and locations_dict

In [6]:
# Import regions and location data
regions_path = 'regions.csv'
unique_locations_path = 'unique_locations.csv'

regions = pd.read_csv(regions_path)
unique_locations = pd.read_csv(unique_locations_path)

# Create dictionaries
regions_dict = {
    front: group.set_index('oblast')['raion'].groupby(level=0).apply(list).to_dict()
    for front, group in regions.groupby('front')
}

locations_dict = unique_locations.set_index('raion')['unique'].to_dict()

In [7]:
# Debugging helper: Track unmatched raions and towns
missing_raions = []
missing_towns = []

# Loop through the "losses_df" DataFrame to populate missing oblasts and fronts
for index, row in losses_df.iterrows():
    front = row['front']
    oblast = row['oblast']
    raion = row['raion']
    town = row['town']

    # Find unique locations and add "raion"
    if pd.isna(raion) and pd.notna(town):
        for locations_raion, unique in locations_dict.items():  # Iterate through unique_dict
            if town.strip().lower() in unique.strip().lower():
                losses_df.at[index, 'raion'] = locations_raion
                raion = locations_raion 
                break  # Exit the loop once a match is found

    # Check if the oblast is missing
    if pd.isna(oblast):
        found = False  # Flag to track if a match is found

        # Loop through the regions_dict to find matching raions and oblasts
        for region_front, oblast_dict in regions_dict.items():
            for region_oblast, raions in oblast_dict.items():
                # Match based on raion
                if pd.notna(raion) and raion.strip().lower() in [r.strip().lower() for r in raions]:
                    # Update oblast and front in the losses_df DataFrame
                    losses_df.at[index, 'oblast'] = region_oblast
                    losses_df.at[index, 'front'] = region_front
                    found = True
                    break
            if found:
                break

        # If still unmatched, log the missing raion or town
        if not found:
            if pd.notna(raion):
                missing_raions.append(raion)
            elif pd.notna(town):
                missing_towns.append(town)

    else:
        # If oblast is present, match to determine the front
        for region_front, oblast_dict in regions_dict.items():
            if oblast.strip().lower() in [o.strip().lower() for o in oblast_dict.keys()]:
                # Update front in the losses_df DataFrame
                losses_df.at[index, 'front'] = region_front
                break

# Debug: Output unmatched raions and towns
if missing_raions:
    print("Unmatched raions:", set(missing_raions))
if missing_towns:
    print("Unmatched towns:", set(missing_towns))

# Output the updated DataFrame
print(losses_df.head())

   id   type   model     status        date         front          oblast  \
0   1  Tanks  T-64BV  Destroyed  2022-03-14  East Ukraine  Luhansk oblast   
1   2  Tanks  T-64BV  Destroyed  2022-03-16  East Ukraine  Donetsk oblast   
2   3  Tanks  T-64BV  Destroyed  2022-03-16  East Ukraine  Donetsk oblast   
3   5  Tanks  T-64BV  Destroyed  2022-03-17  East Ukraine  Donetsk oblast   
4   6  Tanks  T-64BV  Destroyed  2022-03-31  East Ukraine  Luhansk oblast   

                   raion      town        lat        lon  \
0  Sievierodonetsk raion  Rubizhne  49.027241  38.343374   
1         Mariupol raion  Mariupol  47.099126  37.523713   
2         Mariupol raion  Mariupol  47.098693  37.523532   
3         Mariupol raion  Mariupol  47.098140  37.640174   
4  Sievierodonetsk raion  Rubizhne  49.011220  38.398440   

                              tags  
0                              NaN  
1  Turretless, Z, Mine plow/roller  
2                        Shattered  
3                    Turretl

In [8]:
losses_df.iloc[8870:8888]

Unnamed: 0,id,type,model,status,date,front,oblast,raion,town,lat,lon,tags
8870,16611,Tanks,T-72B3 Obr. 2016,Destroyed,2023-02-13,South Ukraine,Crimea,,Crimea,,,"Mine plow/roller, Cope cage"
8871,16612,Tanks,Unknown tank,Destroyed,2023-05-29,East Ukraine,Donetsk oblast,Pokrovsk raion,Pervomaiske,,,
8872,16613,Transport,UAZ-452,Damaged,2023-05-29,South Ukraine,,,,,,∇
8873,16616,Transport,GAZ-66,Destroyed,2023-05-20,East Ukraine,Luhansk oblast,,,,,
8874,16617,Tanks,T-72 *,Destroyed,2023-05-21,East Ukraine,Donetsk oblast,Pokrovsk raion,Pervomaiske,48.05446,37.634177,Turretless
8875,16618,Infantry fighting vehicles,BMP-1(P),Destroyed,2023-05-21,East Ukraine,Donetsk oblast,Pokrovsk raion,Pervomaiske,48.054428,37.634295,
8876,16619,Infantry fighting vehicles,BMP-1(P),Destroyed,2023-07-04,East Ukraine,Donetsk oblast,Bakhmut raion,Bilohorivka,48.756389,38.196357,Turretless
8877,16621,Transport,KamAZ 6x6,Destroyed,2023-03-27,East Ukraine,Luhansk oblast,,,,,
8878,16622,Transport,KamAZ 6x6,Destroyed,2023-03-27,East Ukraine,Luhansk oblast,,,,,
8879,16623,Transport,KamAZ 6x6,Destroyed,2023-05-22,East Ukraine,Luhansk oblast,,,,,Shattered


In [9]:
# Filter rows where 'oblast' column is None (or NaN)
empty_rows = losses_df[losses_df['front'].isna()]

# Output the filtered rows
empty_rows.to_csv('townsuknown.csv', index=False, encoding='utf-8')
print(empty_rows)

          id                        type  \
127      186                       Tanks   
143      216                       Tanks   
153      231                       Tanks   
193      287                       Tanks   
223      340                       Tanks   
...      ...                         ...   
18359  32701  Infantry fighting vehicles   
18368  32712                      Drones   
18372  32718                      Drones   
18373  32719                      Drones   
18374  32721                   Transport   

                                        model     status        date front  \
127                                     T-72B   Captured  2022-04-19  None   
143                           T-72B Obr. 1989  Destroyed  2022-04-07  None   
153                           T-72B Obr. 1989   Captured  2022-04-12  None   
193                                    T-72B3  Destroyed  2022-04-11  None   
223                                    T-72B3   Captured  2022-03-16  None   
...

In [10]:
losses_df.iloc[16800:16815]

Unnamed: 0,id,type,model,status,date,front,oblast,raion,town,lat,lon,tags
16800,30134,Transport,ZiL-131,Destroyed,2024-09-19,East Ukraine,Luhansk oblast,Sievierodonetsk raion,,,,KUNG
16801,30135,Infantry fighting vehicles,BTR-82A(M),Destroyed,2024-09-24,Central Russia,Kursk oblast,Sudzha raion,Dyakovka,51.406583,35.271833,
16802,30136,Infantry fighting vehicles,MT-LB,Destroyed,2024-09-24,East Ukraine,Luhansk oblast,Svatove raion,Kovalivka,,,"Cope cage, Jammer, Additional armour"
16803,30138,Tanks,T-80U,Destroyed,2024-09-24,,,,,,,Turretless
16804,30139,Drones,ZALA Z-16 (421-16Е) reconnaissance UAV,Captured,2024-09-04,,,,,,,
16805,30140,Helicopters,Mi-8AMTSh transport helicopter,Destroyed,2024-09-23,Siberia Russia,Omsk oblast,,Omsk,,,
16806,30144,Infantry fighting vehicles,BMP-3 688A-sb6-2KP,Destroyed,2024-09-23,Central Russia,Kursk oblast,Sudzha raion,Nikolaevo-Darino,51.23851,34.95139,"Turretless, Cope cage"
16807,30145,Towed artillery,122mm 2A18 D-30 howitzer,Destroyed,2024-09-23,East Ukraine,Luhansk oblast,Sievierodonetsk raion,Kreminna,,,
16808,30146,Transport,MAZ-531605,Destroyed,2024-09-23,East Ukraine,Luhansk oblast,Svatove raion,Svatove,49.443282,38.175367,
16809,30148,Infantry fighting vehicles,MT-LB,Destroyed,2024-09-21,Central Russia,Kursk oblast,Sudzha raion,Plekhovo,,,"Cope cage, Expanded cabin"


In [11]:
losses_df.to_csv('russia_losses_clean_locations.csv', index=False, encoding='utf-8')