### Import necessary libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'Nigeria_1997-2024_Sep20.csv'
df = pd.read_csv(file_path)

# Display basic information
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38076 entries, 0 to 38075
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   event_id_cnty       38076 non-null  object 
 1   event_date          38076 non-null  object 
 2   year                38076 non-null  int64  
 3   time_precision      38076 non-null  int64  
 4   disorder_type       38076 non-null  object 
 5   event_type          38076 non-null  object 
 6   sub_event_type      38076 non-null  object 
 7   actor1              38076 non-null  object 
 8   assoc_actor_1       10199 non-null  object 
 9   inter1              38076 non-null  int64  
 10  actor2              29525 non-null  object 
 11  assoc_actor_2       8803 non-null   object 
 12  inter2              38076 non-null  int64  
 13  interaction         38076 non-null  int64  
 14  civilian_targeting  14964 non-null  object 
 15  iso                 38076 non-null  int64  
 16  regi

In [2]:
df.sample(10)

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,...,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp
18568,NIG19061,2020-12-02,2020,1,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),Students (Nigeria); Teachers (Nigeria),6,...,Benin City,6.3351,5.6275,1,GardaWorld,Other,"On 2 December 2020, the academic staff and stu...",0,crowd size=no report,1725317864
31890,NIG6390,2014-09-06,2014,1,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),PDP: People's Democratic Party,6,...,Illorin,8.5,4.55,1,Daily Independent (Nigeria),National,A group of members of Peoples Democratic Party...,0,,1702343752
32206,NIG6061,2014-06-24,2014,1,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),,6,...,Abuja,9.0833,7.5333,1,Daily Leadership (Nigeria),Subnational,"Traders, under the auspices of Apo Traders Ass...",0,,1702343754
15638,NIG21366,2021-07-16,2021,1,Demonstrations,Protests,Protest with intervention,Protesters (Nigeria),Students (Nigeria),6,...,Abeokuta,7.15,3.35,1,Sahara Reporters,Regional,"On 16 July 2021, university students from Ago-...",0,crowd size=no report,1724113595
19320,NIG18447,2020-10-09,2020,1,Political violence; Demonstrations,Protests,Excessive force against protesters,Protesters (Nigeria),#EndSARS,6,...,Abuja,9.0833,7.5333,1,Business Day (Nigeria); Daily Independent (Nig...,Other-National,"On 9 October 2020, supporters of the #EndSARS ...",0,crowd size=no report,1719274247
1858,NIG36834,2024-05-18,2024,1,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),Women (Nigeria),6,...,Okoloba,5.2883,5.837,1,Daily Leadership (Nigeria); Vanguard (Nigeria),National,"On 18 May 2024, women from Okoloba community (...",0,crowd size=no report,1719876536
33188,NIG5088,2013-10-07,2013,1,Demonstrations,Riots,Violent demonstration,Rioters (Nigeria),,5,...,Oshodi,6.5582,3.3466,1,Vanguard (Nigeria),National,A group of youths in the Makinde area of Oshod...,1,,1618566558
20722,NIG17242,2020-06-02,2020,1,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Nigeria (2015-2023),,1,...,Bama,11.5221,13.6856,2,Daily Independent (Nigeria); Daily Post (Niger...,National,"On 2 June 2020, Nigerian Armed Forces conducte...",10,,1702343422
7219,NIG31314,2023-04-25,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),,6,...,Ikorodu,6.6155,3.5087,1,Guardian (Nigeria),Subnational,"On 25 April 2023, locals of Odo Nla community ...",0,crowd size=no report,1702343161
38012,NIG66,1997-05-06,1997,1,Demonstrations,Riots,Violent demonstration,Rioters (Nigeria),,5,...,Onitsha,6.1454,6.7885,1,Guardian (United Kingdom),International,Four people have been killed in riots after an...,4,,1621962592


### Checking for Null Data

In [3]:
# print the column names that have a null value , and the number of null values they contain
null_columns=df.columns[df.isnull().any()]
for i in null_columns:
    print(i,df[i].isnull().sum())

assoc_actor_1 27877
actor2 8551
assoc_actor_2 29273
civilian_targeting 23112
admin1 1
admin2 1
admin3 38076
tags 30562


In [4]:
# find unique values in each column
for col in df.columns:
    print(col, df[col].nunique())

event_id_cnty 38076
event_date 6513
year 28
time_precision 3
disorder_type 4
event_type 6
sub_event_type 24
actor1 881
assoc_actor_1 1218
inter1 8
actor2 883
assoc_actor_2 1381
inter2 9
interaction 43
civilian_targeting 1
iso 2
region 1
country 1
admin1 37
admin2 751
admin3 0
location 5116
latitude 4935
longitude 5013
geo_precision 3
source 2824
source_scale 23
notes 36356
fatalities 134
tags 221
timestamp 2107


In [5]:

# Drop columns that are completely empty or irrelevant
df_cleaned = df.drop(columns=['admin3', 'timestamp','tags','event_id_cnty','time_precision',
                              'geo_precision','notes','source','source_scale','country','region','assoc_actor_1',
                              'assoc_actor_2','inter2','actor2','interaction','iso'])

# Convert 'event_date' to datetime
df_cleaned['event_date'] = pd.to_datetime(df_cleaned['event_date'], errors='coerce')

# For 'admin1', 'admin2', the row containging missing values will be dropped
df_cleaned.dropna(subset=['admin1', 'admin2'], inplace=True)

df_cleaned['civilian_targeting'].fillna('Unknown', inplace=True)

# Check for duplicates and remove them if found
df_cleaned.drop_duplicates(inplace=True)

# Display cleaned dataset information to verify changes
df_cleaned.info()


<class 'pandas.core.frame.DataFrame'>
Index: 37794 entries, 0 to 38075
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   event_date          37794 non-null  datetime64[ns]
 1   year                37794 non-null  int64         
 2   disorder_type       37794 non-null  object        
 3   event_type          37794 non-null  object        
 4   sub_event_type      37794 non-null  object        
 5   actor1              37794 non-null  object        
 6   inter1              37794 non-null  int64         
 7   actor2              29432 non-null  object        
 8   civilian_targeting  37794 non-null  object        
 9   admin1              37794 non-null  object        
 10  admin2              37794 non-null  object        
 11  location            37794 non-null  object        
 12  latitude            37794 non-null  float64       
 13  longitude           37794 non-null  float64       


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned['civilian_targeting'].fillna('Unknown', inplace=True)


In [9]:
df_cleaned.isnull().sum()

event_date            0
year                  0
disorder_type         0
event_type            0
sub_event_type        0
actor1                0
inter1                0
civilian_targeting    0
admin1                0
admin2                0
location              0
latitude              0
longitude             0
fatalities            0
dtype: int64

In [10]:
# save cleaned_dataframe to a new csv file
df_cleaned.to_csv('Nigeria_1997-2024_Sep20_cleaned.csv', index=False)