# Russia-Ukraine Conflict Analysis

The following notebook works as a work of research for the analysis of data from the Ethereum network during the period of the Russian-Ukraine conflict.


## Import Statements

In [1]:
import pandas as pd
import numpy as np
from enum import Enum
import seaborn as sns

The first thing to do is create a class that will be used to manage the analysis of degree for the Ethereum datasets in 2021 and compare it with the Russia invasion period (feb. 2022).

Five datasets are required to use this class:

- Four "lookup" datasets. They contain 3-month historical data of the degree of all active accounts in each period. The first one has data from January to March, the second one has data from April to June and so forth.
- A single "lookup" dataset corresponding to the period of interest: The one with info from the beginning of the conflict, ranging from 2022 February 10th to 2022 March 10th.

These datasets need to be in the following format:

- They need to be in the same directory.
- They need to have the same name, differing only by a suffix (e.g. "lookup_1, lookup_2, lookup_3, lookup_4 and lookup_war"). These suffixes are hardcoded for now.

In [10]:
class DegreeType(Enum):
    IN_DEGREE = 1
    OUT_DEGREE = 2
    DEGREE = 3

class ETHDegreeAnalyzer:
    '''
    Class responsible for handling degree comparasions in Ethereum for different time periods.

    ----------

    #### Attributes:
    
    - datasets_name: str

            Name used for the filenames of the lookup datasets. Defaults to "lookup".

    - lookup_path: str
    
            Path for the lookup dataset files that will be used for the comparasions. Defaults to the current working directory.
    - degree_type: DegreeType

            Type of degree analysis to be performed. Acceps DegreeType.IN_DEGREE, DegreeType.OUT_DEGREE and DegreeType.DEGREE. 
            Defaults to DegreeType.DEGREE.

    ----------

    #### Methods:

    - load_dataset()

            Loads the dataset in the memory

    '''
    def __init__(
            self,
            datasets_name: str = 'lookup',
            lookup_path: str = './',
            degree_type: DegreeType = DegreeType.DEGREE
        ):

        self.datasets_name = datasets_name
        self.lookup_path = lookup_path
        self.degree_type = degree_type
        self._dataset_prepared = False
        self._df = None
        self._2021_col_identifier = 'avgValue'

    
    def _concat_lookup(
            self,
            df: pd.DataFrame,
            df_id: str,
            suffix: str
        ) -> pd.DataFrame:
        
        try:  
            df_name = self.lookup_path+self.datasets_name+'_'+df_id+suffix+'.csv'
            new_df = pd.read_csv(df_name, index_col = 0)
            new_df = pd.DataFrame(new_df.mean(axis = 1), columns = [self._2021_col_identifier+df_id])
        except Exception as e:
            print(f'Warning: {df_name} could not be read": {e}')
            return df
        return df.merge(new_df, how='left', left_index=True, right_index=True).fillna(0)

    def load_dataset(self) -> None:
        '''
        Loads into memory all the datasets from the filepath specified concatenated. If any is missing, it will be ignored and
        the function will try to concatenate the others. Other functions will call this one implicitly if they need the dataframe loaded.
        '''
        match(self.degree_type):
            case DegreeType.IN_DEGREE:
                suffix = '_in'
            case DegreeType.OUT_DEGREE:
                suffix = '_out'
            case DegreeType.DEGREE:
                suffix = ''
            case _:
                raise ValueError('Degree type not recognized.')

        self._df = (
            pd.read_csv(self.lookup_path+self.datasets_name+'_war'+suffix+'.csv', index_col = 0)
            .pipe(self._concat_lookup, df_id = '1', suffix = suffix)
            .pipe(self._concat_lookup, df_id = '2', suffix = suffix)
            .pipe(self._concat_lookup, df_id = '3', suffix = suffix)
            .pipe(self._concat_lookup, df_id = '4', suffix = suffix)
        )

    def _prepare_dataset(self) -> None:
        if self._df == None:
            self.load_dataset()
        cols_2021 = []
        for column in self._df.columns:
            if column.startswith(self._2021_col_identifier):
                cols_2021.append(column)
                self._df[column] = self._df[column]/self._df['degree'] #absolute average value is converted to relative average value
        self._df['totalVal'] = self._df[cols_2021].sum(axis = 1)
        self._df['stdVal'] = self._df[cols_2021].std(axis = 1)
        self._dataset_prepared = True

    def get_descending_accounts(self, max_std: float, min_total_val: float, min_degree: float = -1) -> pd.DataFrame:
        '''
        Constructs a dataframe with the accounts that had a high-ish degree in 2021 and decreased in the period of interest.

        ---

        #### Parameters:

        - max_std: float

            The maximum standard deviation allowed for the 2021 period. The lowest the value, the more stable the accounts will
            need to be to be selected.
        - min_total_val: float

            The minimum value for the summation of the relative degrees in 2021. The lowest the value, the lower the differences
            between 2021 and the war are going to be.
        - min_degree (optional): float

            The lowest degree for the resulting dataframe. Can be used to filter out little activity accounts.

        #### Returns: 
        
        DataFrame
        '''
        if not self._dataset_prepared:
            self._prepare_dataset()

        result = (
            self._df.query('stdVal > 0 and stdVal <= '+str(max_std)) 
            .query('totalVal > '+str(min_total_val)) 
            .sort_values(by = 'degree', ascending = False) 
        )
        return result if min_degree == -1 else result.query('degree >= '+str(min_degree))
    
    def get_ascending_accounts(self, max_std: float, max_total_val: float, min_degree: float = -1) -> pd.DataFrame:
        '''
        Constructs a dataframe with the accounts that had little activity (but existed!) in 2021 and increased a lot
        in the period of interest.

        ---

        #### Parameters:

        - max_std: float

            The maximum standard deviation allowed for the 2021 period. The lowest the value, the more stable the accounts will
            need to be to be selected.
        - max_total_val: float

            The maximum value for the summation of the relative degrees in 2021. The lowest the value, the bigger the differences
            between 2021 and the war are going to be.
        - min_degree (optional): float

            The lowest degree for the resulting dataframe. Can be used to filter out little activity accounts.

        #### Returns: 
        
        DataFrame
        '''
        if not self._dataset_prepared:
            self._prepare_dataset()


        columns_2021 = [col for col in self._df.columns if col.startswith(self._2021_col_identifier)]
        exists_in_2021 = ' and '.join(f'{column} > 0' for column in columns_2021)
        result = (
            self._df.query(exists_in_2021)
            .query('stdVal > 0 and stdVal < '+str(max_std))
            .query('totalVal <= '+str(max_total_val))
            .sort_values(by = 'degree', ascending = False)
        )   
        return result if min_degree == -1 else result.query('degree >= '+str(min_degree))

In [15]:
degree_analyzer = ETHDegreeAnalyzer(lookup_path='../data/')
asc_df = degree_analyzer.get_ascending_accounts(10, 1)
asc_df

Unnamed: 0,degree,avgValue1,avgValue2,avgValue3,totalVal,stdVal
0xfbddadd80fe7bda00b901fbaf73803f2238ae655,248829.0,0.078596,0.142731,0.297560,0.518887,0.112569
0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,152168.0,0.136691,0.300589,0.515829,0.953109,0.190147
0x15d4c048f83bd7e37d49ea4c83a07267ec4203da,111325.0,0.097795,0.084388,0.268233,0.450415,0.102492
0x58b6a8a3302369daec383334672404ee733ab239,100918.0,0.109653,0.163395,0.062899,0.335948,0.050288
0xb0cf943cf94e7b6a2657d15af41c5e06c2bfea3d,87807.0,0.101507,0.115765,0.149050,0.366322,0.024398
...,...,...,...,...,...,...
0xe6d23a63e5a5c6f3f7bf4351cfdb1923a4c1045e,2.0,0.166667,0.250000,0.333333,0.750000,0.083333
0x096aa77a9f0a0408be6a405578108df31d15452f,2.0,0.166667,0.250000,0.333333,0.750000,0.083333
0x11ec84106d1b4503ce16ff95ee436487497dc9ae,2.0,0.166667,0.250000,0.500000,0.916667,0.173472
0x468452b52eb84eee68a78dbd38db191a436aece4,2.0,0.166667,0.500000,0.333333,1.000000,0.166667


In [13]:
degree_analyzer.get_descending_accounts(10, 25)

Unnamed: 0,degree,avgValue1,avgValue2,avgValue3,totalVal,stdVal
0x7a250d5630b4cf539739df2c5dacb4c659f2488d,323122.0,11.291720,11.785063,4.325878,27.402660,4.171446
0xa1d8d972560c2f8144af871db508f0b0b10a3fbf,12394.0,22.384057,23.095934,9.519068,54.999059,7.641400
0xd07dc4262bcdbf85190c01c996b4c06a461d2430,4425.0,12.803315,7.462147,4.836309,25.101770,4.059891
0xf6874c88757721a02f47592140905c4336dfbc61,3244.0,15.371147,19.653822,18.558364,53.583333,2.224848
0xed212a4a2e82d5ee0d62f70b5dee2f5ee0f10c5d,2880.0,6.451736,7.655556,11.032870,25.140162,2.374947
...,...,...,...,...,...,...
0xa431fba4027585b4e66fccadd70d0da1702d49c9,1.0,11.000000,12.000000,2.666667,25.666667,5.124379
0x39caf13a104ff567f71fd2a4c68c026fdb6e740b,1.0,1.000000,15.500000,10.666667,27.166667,7.383039
0x25194f5b023255db57dcaeb777fda8eddc3684ca,1.0,12.333333,9.000000,5.000000,26.333333,3.671714
0xa439083921877dea6e4001d9507455288c70b631,1.0,0.000000,16.000000,13.000000,29.000000,8.504901
