In [1]:
import datetime
import pandas as pd


In [2]:
'''Ucranian refugees (cumulative) by date and destination'''

df_refugees = pd.read_csv(r'..\data\Refugees.csv')


# Set datetime column as index column:
def index_by_datetime(dataframe, name='date'):
    '''Requires a dataframe with a datetime column as a positional argument and it may have an alternative name for that column as a key argument'''
    for column in dataframe.columns:
        if column == name:
            dataframe.sort_values(column, inplace=True)
            dataframe[column] = pd.to_datetime(dataframe[column], exact=False)
            dataframe.set_index(name, inplace=True)
            break


# turn values in cumulative columns to absolute values:
def decumulate_columns(dataframe, excluded=None):
    '''Requires a dataframe with a numerical index as a positional argument and it may have a list of excluded columns as a key argument'''
    if excluded is None:
        excluded = []
    for column in dataframe.columns:
        if column not in excluded:
            for index, value in reversed(list(enumerate(dataframe[column]))):
                if index != 0:
                    dataframe.loc[[index], column] = value - dataframe[column][index-1]


# NO SE USA FUERA DEL TEST
# Change columns to numeric
def change_to_numeric(dataframe, columns):
    '''Requires a dataframe and a list of columns as positional arguments'''
    for column in columns:
        dataframe[column] = pd.to_numeric(dataframe[column])


condition = df_refugees[(df_refugees['country'] == 'Romania') | (df_refugees['country'] == 'Russian Federation') | (df_refugees['country'] == 'Other European countries')].index

df_refugees.drop(condition, inplace=True)

index_by_datetime(df_refugees)

change_to_numeric(df_refugees, ['individuals'])

df_refugees = df_refugees[['country','individuals']]

df_refugees


Unnamed: 0_level_0,country,individuals
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-01,Belarus,341
2022-03-01,Poland,453982
2022-03-01,Slovakia,67000
2022-03-01,Republic of Moldova,79315
2022-03-01,Hungary,116348
...,...,...
2022-09-10,Republic of Moldova,606707
2022-09-11,Belarus,16705
2022-09-13,Hungary,1381738
2022-09-13,Slovakia,778335


In [13]:
'''Price of antibiotics in each market in Ukraine by date + longitude east (degrees) column to compare the proximity to the eastern front'''

df_prices = pd.read_csv(r'..\data\Prices.csv', low_memory=False)

df_prices = df_prices.drop(index=0)

longitude_east = {'Kherson': 32.6169,
                'Dnipropetrovsk': 35.0462,
                'Vinnytsya': 28.4682,
                'Zhytomyr': 28.6587,
                'Poltava': 34.5514,
                'Mykolayiv': 31.9946,
                'Kyiv city': 30.5234,
                'Iv.-Frankivsk': 24.7111,
                'Volyn': 25.3254,
                'Zaporizhzhya': 35.1396,
                'Khmelnytskiy': 26.9871,
                'Chernivtsi': 25.9358,
                'Kirovohrad': 32.2623,
                'Lviv': 24.0297,
                'Zakarpattya': 22.2879,
                'Sumy': 34.7981,
                'Ternopyl': 25.5948,
                'Odesa': 30.7233,
                'Cherkasy': 32.0598,
                'Kyiv': 30.5234,
                'Rivne': 26.2516,
                'Chernihiv': 31.2893,
                'Kharkiv': 36.2304,
                'Donetsk': 37.8028
            } # pérdida de tiempo... la columna longitud ya existe en el dataframe lol
for key, value in longitude_east.items():
    df_prices.loc[df_prices['market'] == key, 'longitude_east (degrees)'] = value

change_to_numeric(df_prices, ['usdprice', 'longitude_east (degrees)'])

index_by_datetime(df_prices)    
start_war = datetime.datetime(2022, 2, 24, 0, 0, 0)
df_prices = df_prices[df_prices.index >= start_war]

df_prices = df_prices[['usdprice', 'market', 'commodity', 'longitude_east (degrees)']]

df_prices = df_prices[(df_prices['commodity'].str.contains('imported')) |(df_prices['commodity'].str.contains('local'))]


df_prices['market'].value_counts()


UAH          70730
#currency        1
Name: currency, dtype: int64

In [4]:
'''Russian personnel losses by date'''

df_personnel = pd.read_csv(r'..\data\Russia_losses_personnel(date).csv')

df_personnel = df_personnel[['date', 'personnel']]

change_to_numeric(df_personnel, ['personnel'])
decumulate_columns(df_personnel, excluded=['date'])
index_by_datetime(df_personnel)

df_personnel.drop(index='2022-02-25', inplace=True)

df_personnel


Unnamed: 0_level_0,personnel
date,Unnamed: 1_level_1
2022-02-26,1500
2022-02-27,200
2022-02-28,800
2022-03-01,410
2022-03-02,130
...,...
2022-11-05,600
2022-11-06,490
2022-11-07,530
2022-11-08,710


In [8]:
# Russian equipment losses, by broad categories, by date:

df_equipment = pd.read_csv(r'..\data\Russia_losses_equipment(date).csv')

df_equipment.dropna(axis=1, inplace=True)
df_equipment.drop(columns=['day'], inplace=True)

decumulate_columns(df_equipment, excluded=['date'])
index_by_datetime(df_equipment)

df_equipment['total losses'] = df_equipment[['aircraft', 
                                        'helicopter',
                                        'tank',
                                        'APC',
                                        'field artillery',
                                        'MRL',
                                        'drone',
                                        'naval ship',
                                        'anti-aircraft warfare'
                                        ]].sum(axis=1)

for column in df_equipment.columns:
    pd.to_numeric(df_equipment[column])

df_equipment.drop(index='2022-02-25', inplace=True)

df_equipment


205

In [6]:
# Russian equipment losses (includes model and manufacturer), by date:

df_tech = pd.read_csv(r'..\data\Russia_losses_equipment(tech_details).csv')

for column in df_tech:
    if column != 'sub_model':
        df_tech[column].fillna(0.0, inplace=True)

df_tech['total captured'] = df_tech[['captured', 
                                    'captured and destroyed',
                                    'captured and stripped',
                                    'damaged and captured'
                                    ]].sum(axis=1)
df_tech['total not captured'] = df_tech[['abandoned', 
                                        'abandoned and destroyed',
                                        'damaged',
                                        'damaged and abandoned',
                                        'damaged beyond economical repair',
                                        'damaged by Bayraktar TB2',
                                        'destroyed',
                                        'destroyed by Bayraktar TB2',
                                        'destroyed by Bayraktar TB2 and Harpoon AShM',
                                        'sunk'
                                        ]].sum(axis=1)

df_tech['sub_model'].fillna("", inplace=True)

df_tech['model'] = df_tech['model'] + ' ' + df_tech['sub_model']

df_tech.drop(columns='sub_model', inplace=True)

df_tech


Unnamed: 0,equipment,model,manufacturer,losses_total,abandoned,abandoned and destroyed,captured,captured and destroyed,captured and stripped,damaged,damaged and abandoned,damaged and captured,damaged beyond economical repair,damaged by Bayraktar TB2,destroyed,destroyed by Bayraktar TB2,destroyed by Bayraktar TB2 and Harpoon AShM,sunk,total captured,total not captured
0,Tanks,T-62M,the Soviet Union,20,1.0,0.0,14.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,3.0,0.0,0.0,0.0,16.0,4.0
1,Tanks,T-62MV,the Soviet Union,3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0
2,Tanks,T-64A,the Soviet Union,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
3,Tanks,T-64BV,the Soviet Union,39,2.0,0.0,4.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,30.0,0.0,0.0,0.0,5.0,34.0
4,Tanks,T-72A,the Soviet Union,33,1.0,0.0,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,15.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,"Trucks, Vehicles and Jeeps",UAZ-23632-148-64 armed pickup truck,Russia,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
294,"Trucks, Vehicles and Jeeps",UAZ-394511 ‘Esaul’,Russia,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,3.0
295,"Trucks, Vehicles and Jeeps",UAZ-515195 'Esaul',Russia,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
296,"Trucks, Vehicles and Jeeps",(Unknown) truck,Russia,296,1.0,0.0,12.0,1.0,0.0,7.0,2.0,0.0,0.0,0.0,265.0,8.0,0.0,0.0,13.0,283.0


In [7]:
# Ukrainian equipment losses (includes model and manufacturer), by date:

df_uk_tech = pd.read_csv(r'..\data\Ukraine_losses_equipment(tech_details).csv')

for column in df_uk_tech:
    if column != 'sub_model':
        df_uk_tech[column].fillna(0.0, inplace=True)

df_uk_tech['total captured'] = df_uk_tech[['captured', 
                                            'captured and destroyed',
                                            'damaged and captured',
                                            'damaged by Orion and captured',
                                            'sunk but raised by Russia'
                                            ]].sum(axis=1)
df_uk_tech['total not captured'] = df_uk_tech[['abandoned', 
                                                'abandoned and destroyed',
                                                'damaged',
                                                'damaged and abandoned',
                                                'damaged beyond economical repair',
                                                'damaged by Forpost-R',
                                                'destroyed',
                                                'destroyed by Forpost-R',
                                                'destroyed by Orion',
                                                'destroyed by loitering munition',
                                                'scuttled to prevent capture by Russia',
                                                'sunk'
                                                ]].sum(axis=1)

df_uk_tech['sub_model'].fillna("", inplace=True)

df_uk_tech['model'] = df_uk_tech['model'] + ' ' + df_uk_tech['sub_model']

df_uk_tech.drop(columns='sub_model', inplace=True)

df_uk_tech


Unnamed: 0,equipment,model,manufacturer,losses_total,abandoned,abandoned and destroyed,captured,captured and destroyed,damaged,damaged and abandoned,...,damaged by Orion and captured,destroyed,destroyed by Forpost-R,destroyed by Orion,destroyed by loitering munition,scuttled to prevent capture by Russia,sunk,sunk but raised by Russia,total captured,total not captured
0,Tanks,T-64A,the Soviet Union,1,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,Tanks,T-64B,the Soviet Union,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,Tanks,T-64BV,the Soviet Union,123,3.0,0.0,41.0,8.0,3.0,1.0,...,0.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0,53.0,70.0
3,Tanks,T-64BV Zr. 2017,Ukraine,49,3.0,0.0,27.0,0.0,1.0,1.0,...,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,22.0
4,Tanks,T-64B1M,Ukraine,4,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,"Trucks, Vehicles and Jeeps",Ford Ranger,the United States,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
225,"Trucks, Vehicles and Jeeps",Peugeot P4,France,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
226,"Trucks, Vehicles and Jeeps",Unknown truck,Ukraine,52,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,46.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,51.0
227,"Trucks, Vehicles and Jeeps",Unknown truck with ZU-23 AA gun,Ukraine,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
