In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv('terrorismData.csv')

In [3]:
dataset = dataset.drop(['Summary','Target']  , axis=1)

In [4]:
dataset.rename({'Target_type':'Targettype' , 'Weapon_type':'Weapontype' }, axis=1 , inplace=True)

In [5]:
dataset['Targettype'] = dataset['Targettype'].replace(to_replace="Unknown" , value=np.nan)
dataset['Targettype'].fillna(dataset['Targettype'].mode()[0], inplace=True)

In [6]:
dataset['Weapontype'] = dataset['Weapontype'].replace(to_replace=["Unknown","Sabotage Equipment" , "Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)" , "Fake Weapons" , "Radiological" , "Biological" , "Other"] , value=np.nan)
dataset['AttackType'] = dataset['AttackType'].replace(to_replace="Unknown" , value=np.nan)

In [7]:
mode_attacktype = dataset.pivot_table(values='AttackType' , columns='Targettype' , aggfunc=(lambda x : x.mode()[0]))
mode_weapontype = dataset.pivot_table(values='Weapontype' , columns='Targettype' , aggfunc=(lambda x : x.mode()[0]))
missing_Weapontype=dataset['Weapontype'].isnull()
missing_Attacktype=dataset['AttackType'].isnull()

In [8]:
dataset.loc[missing_Weapontype , 'Weapontype'] = dataset.loc[missing_Weapontype , 'Targettype'].apply(lambda x : mode_weapontype[x])
dataset.loc[missing_Attacktype , 'AttackType'] = dataset.loc[missing_Attacktype , 'Targettype'].apply(lambda x : mode_attacktype[x])

In [9]:
mean_killed = dataset.pivot_table(values='Killed' , columns='AttackType' , aggfunc=(lambda x : x.mean()[0]))
mean_killed = mean_killed.round(decimals=0)
mean_wounded = dataset.pivot_table(values='Wounded' , columns='AttackType' , aggfunc=(lambda x : x.mean()[0]))
mean_wounded = mean_wounded.round(decimals=0)

  mean_killed = dataset.pivot_table(values='Killed' , columns='AttackType' , aggfunc=(lambda x : x.mean()[0]))
  mean_wounded = dataset.pivot_table(values='Wounded' , columns='AttackType' , aggfunc=(lambda x : x.mean()[0]))


In [10]:
missing_attack = dataset['Killed'].isnull()
missing_wounded = dataset['Wounded'].isnull()

In [11]:
dataset.loc[missing_attack , 'Killed'] = dataset.loc[missing_attack , 'AttackType'].apply(lambda x : mean_killed[x])
dataset.loc[missing_wounded , 'Wounded'] = dataset.loc[missing_wounded , 'AttackType'].apply(lambda x : mean_wounded[x])

In [12]:
mode_state = dataset.pivot_table(values='State' , columns='Country' , aggfunc=(lambda x: x.mode()[0]))
missing_state = dataset['State'].isnull()
dataset.loc[missing_state , 'State'] = dataset.loc[missing_state , 'Country'].apply(lambda x : mode_state[x])

In [13]:
dataset['City'] = dataset['City'].replace(to_replace=["Unknown","unknown"] , value=np.nan)
missing_city = dataset['City'].isnull()
mode_city = dataset.pivot_table(values='City' , columns='State' , aggfunc=(lambda x:x.mode()))
dataset.loc[missing_city , 'City'] = dataset.loc[missing_city , 'State'].apply(lambda x: mode_city[x])

In [14]:
mode_latitude = dataset.pivot_table(values='Latitude' , columns='State' , aggfunc=(lambda x:x.mode()))
missing_latitude=dataset['Latitude'].isnull()
dataset.loc[missing_latitude , 'Latitude'] = dataset.loc[missing_latitude , 'State'].apply(lambda x : mode_latitude[x])

In [15]:
mode_longitude = dataset.pivot_table(values='Longitude' , columns='State' , aggfunc=(lambda x:x.mode()))
missing_longitude=dataset['Longitude'].isnull()
dataset.loc[missing_longitude , 'Longitude'] = dataset.loc[missing_longitude , 'State'].apply(lambda x : mode_longitude[x])

In [16]:
indexyear = dataset[(dataset['Year']<=2009)].index
dataset.drop(indexyear , inplace=True)

In [17]:
unknowngroup = dataset[dataset['Group'] == 'Unknown'].index
dataset.drop(unknowngroup , inplace=True)

In [18]:
year1 = dataset.Year.value_counts()
years = pd.DataFrame({'year':year1.index , 'Number of attacks':year1.values})
years.sort_values(by = 'year' , inplace=True)

In [19]:
fg1 = px.line(years , x= 'year' , y = 'Number of attacks')
fg1.show()

In [20]:
grp = dataset.Group.value_counts()
grpdf = pd.DataFrame({'Group':grp.index , 'count':grp.values})
grpdf.shape


(1010, 2)

In [21]:
grpdf.drop(grpdf.index[15:] , axis=0 , inplace=True)
fg2 = px.histogram(grpdf , y = 'Group' , x = 'count' , color='Group',title='Terrorist Groups with Highest Terror Attacks')
fg2.show()

In [22]:
region1 = dataset.Region.value_counts()
regiondf = pd.DataFrame({'region':region1.index , 'count':region1.values})
fg3 = px.histogram(regiondf , x='region' , y='count' , color='region',title="Terrorist Attacks By Region")
fg3.show()

In [23]:
city1 = dataset.City.value_counts()
city = pd.DataFrame({'city':city1.index , 'count':city1.values})
city.drop(city.index[15:] , axis = 0 , inplace=True)
fig4 = px.histogram(city , x = 'city' , y = 'count' , color='city' , title='Most Attack Cities')
fig4.show()

In [24]:
attacktype1 = dataset.AttackType.value_counts()
attacktp = pd.DataFrame({'Attacktype':attacktype1.index , 'count':attacktype1.values})
fg5 = px.pie(attacktp , values='count' , names='Attacktype',title='Terrorist Attack Types')
fg5.show()

In [25]:
targettype1 = dataset.Targettype.value_counts()
ttdf = pd.DataFrame({'target type':targettype1.index , 'count':targettype1.values})
fig6 = px.histogram(ttdf , x = 'target type' , y = 'count' , color='target type' , title = 'Terrorist Target')
fig6.show()

In [26]:
weapontype1 = dataset.Weapontype.value_counts()
weapontp = pd.DataFrame({'weapontype':weapontype1.index , 'count':weapontype1.values})
fg7 = px.pie(weapontp , values='count' , names='weapontype',title='Terrorist Weapon Types')
fg7.show()

In [27]:
dataset3 = dataset
dataset3 = dataset3.drop(['Year' , 'Month' ,'Day','State','Region','City','Latitude','Longitude','Country'],axis=1)

In [28]:
encoder = LabelEncoder()
dataset3['AttackType'] = encoder.fit_transform(dataset3['AttackType'])
dataset3['Group'] = encoder.fit_transform(dataset3['Group'])
dataset3['Targettype'] = encoder.fit_transform(dataset3['Targettype'])
dataset3['Weapontype'] = encoder.fit_transform(dataset3['Weapontype'])

In [29]:
x = dataset3.drop(['Group'] , axis = 1)
y = dataset3['Group']

In [30]:
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size=0.2 , random_state=1)

In [31]:
acc_list = []
model_list = ["DecisionTreeClassifier" ,"RandomForestClassifier","KNeighborsClassifier"]

In [32]:
from sklearn.tree import DecisionTreeClassifier
clf1 = DecisionTreeClassifier()
model1 = clf1.fit(x_train , y_train)
acc_list.append(model1.score(x_test , y_test))

In [33]:
from sklearn.ensemble import RandomForestClassifier
clf2 = RandomForestClassifier(max_depth=4 , random_state=0)
model2 = clf2.fit(x_train , y_train)
acc_list.append(model2.score(x_test , y_test))

In [34]:
from sklearn.neighbors import KNeighborsClassifier 
clf3 = KNeighborsClassifier(n_neighbors=5 , metric='minkowski' , p=2)
model3 = clf3.fit(x_train , y_train)
acc_list.append(model3.score(x_test , y_test))

In [35]:
acc_df = pd.DataFrame(np.c_[model_list , acc_list] , columns=["Model","Accuracy"])

In [36]:
acc_df.head()

Unnamed: 0,Model,Accuracy
0,DecisionTreeClassifier,0.2464041499646309
1,RandomForestClassifier,0.21881631690639
2,KNeighborsClassifier,0.1957085593020514
