In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# read dataset
df = pd.read_csv('fatalities.csv')

# Explore the dataset and identify the trends in fatalities over time. Identify any significant changes, spikes, or declines in the number of fatalities.

In [None]:
# Convert date_of_death column to datetime format 
df['date_of_death'] = pd.to_datetime(df['date_of_death'])

df['year_of_death'] = df['date_of_death'].dt.year

# Group year_of_death and count the number of deaths for each year
deaths_by_year = df.groupby('year_of_death').size().reset_index(name='no_of_deaths')

# Plot the trends
plt.figure(figsize=(10, 6))
plt.plot(deaths_by_year['year_of_death'], deaths_by_year['no_of_deaths'], marker='o', linestyle='-')
plt.title('Number of Fatalities Over the Years')
plt.xlabel('Year of Death')
plt.ylabel('Number of Fatalities')
plt.grid(True)
plt.show()




### This graph show trend of number of fatalities over time. The highest number of deaths occurs is at the year 2014. It shows that most of the times, if one year got less deaths then next year ratio of death increases.

# Conduct an analysis by examining the age, gender, and citizenship of the individuals killed. Determine if there are any notable patterns or disparities in the data.

In [None]:

# Drop null values in age and gender
df.dropna(subset=['age', 'gender'], inplace=True)


In [None]:
# Distribution of age
plt.figure(figsize=(10, 5))
plt.hist(df['age'], bins=20, edgecolor='black', alpha=0.5)
plt.title('Distribution of Age')
plt.xlabel('Age')
plt.ylabel('Total Fatalities')
plt.show()

### This graph shows that most individuals killed are of the age 20.

In [None]:

# Plot for gender
plt.figure(figsize=(10, 5))
gender_counts = df['gender'].value_counts()
plt.bar(gender_counts.index, gender_counts, color=['skyblue', 'lightgreen'])
plt.title('Gender Distribution')
plt.xlabel('Gender')
plt.ylabel('Total Fatalities')
plt.show()

### This graph shows that percentage of males killed are 3 times more than females.

In [None]:

# Plot for citizenship
plt.figure(figsize=(10, 5))
citizenship_counts = df['citizenship'].value_counts()
plt.bar(citizenship_counts.index, citizenship_counts, color='salmon')
plt.title('Citizenship Distribution')
plt.xlabel('Citizenship')
plt.ylabel('Total Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()


### This graph shows that mostly citizens killed are from Palestinian

In [None]:

# Boxplot of age by citizenship 
plt.figure(figsize=(10, 5))
citizenship_labels = df['citizenship'].unique()
citizenship_data = [df[df['citizenship'] == citiz]['age'] for citiz in citizenship_labels]
plt.boxplot(citizenship_data, labels=citizenship_labels)
plt.title('Age Distribution by Citizenship')
plt.xlabel('Citizenship')
plt.ylabel('Age')
plt.xticks(rotation=45, ha='right')
plt.show()


### This plot shows that in Palestine mostly people killed are of age between 20-30. In Israel, mostly people killed are of age between 20-45. In Jordan, mostly people killed are of age 25-35.

###

# Visualize the distribution of fatalities and identify areas that have experienced higher levels of violence.

In [None]:
# Count occurrences of each event_location_district
location_counts = df['event_location_district'].value_counts()

location_names = location_counts.index.to_numpy()
location_counts = location_counts.to_numpy()

# Plot a bar chart to visualize fatalities by event_location_district
plt.figure(figsize=(10, 5))
plt.bar(location_names, location_counts, color='skyblue')
plt.title('Fatalities by Location (event_location_district)')  
plt.xlabel('Location (event_location_district)') 
plt.ylabel('Number of Fatalities')  
plt.xticks(rotation=45, ha='right') 
plt.show()


### This graph shows that most individuals killed are from Gaza.

In [None]:

# Group by event_location_district and calculate the average age for each location
age_by_location = df.groupby('event_location_district')['age'].mean().reset_index()

# Plot a bar chart to show the average age in each location
plt.figure(figsize=(10, 5))
plt.bar(age_by_location['event_location_district'], age_by_location['age'], color='skyblue')
plt.title('Average Age by Location (event_location_district)')
plt.xlabel('Location (event_location_district)')
plt.ylabel('Average Age')
plt.xticks(rotation=45, ha='right')
plt.show()


### This graph shows the average age of individuals killed on that location.

In [None]:
# Count the occurrences of each event_location_region
location_counts = df['event_location_region'].value_counts()

location_names = location_counts.index.to_numpy()
location_counts = location_counts.to_numpy()

# Plot a bar chart to visualize fatalities by event location region
plt.figure(figsize=(10, 5))
plt.bar(location_names, location_counts, color='skyblue')  
plt.title('Fatalities by Location (event_location_region)')  
plt.xlabel('Location (event_location_region)') 
plt.ylabel('Number of Fatalities') 
plt.xticks(rotation=45, ha='right')
plt.show()


### This graph shows tha analysis of even location region. This shows Gaza strip region individuals are killed most.

In [None]:

# Group by event_location_district and calculate the average age for each location
age_by_region = df.groupby('event_location_region')['age'].mean().reset_index()

# Plot a bar chart to show the average age in each location
plt.figure(figsize=(10, 5))
plt.bar(age_by_region['event_location_region'], age_by_region['age'], color='skyblue')
plt.title('Average Age by Location (event_location_region)')
plt.xlabel('Location (event_location_region)')
plt.ylabel('Average Age')
plt.xticks(rotation=45, ha='right')
plt.show()


### This graph shows the Avg age of individuals killed on Gaza strip is 27. Avg age of individuals killed on Israel is 37. Avg age of individuals killed on West Bank is 28.

# Examine the types of injuries inflicted on individuals. Identify the most common types of injuries and assess their severity.

In [None]:
# Count the occurrences of each type of injury
counts = df['type_of_injury'].value_counts()

types = counts.index.to_numpy()
counts = counts.to_numpy()

# Plot a bar chart to visualize the most common types of injuries
plt.figure(figsize=(10, 5))
plt.bar(types, counts, color='skyblue') 
plt.title('Most Common Types of Injuries') 
plt.xlabel('Types of Injuries') 
plt.ylabel('Total Fatalities')  
plt.xticks(rotation=45, ha='right')  
plt.show()


### This graph shows most type of injuries occur through gunfire.

In [None]:

# Count the occurrences of each combination of type_of_injury and killed_by
injury_killed_by_counts = df.groupby(['type_of_injury', 'killed_by']).size().unstack(fill_value=0)

# Plot a grouped bar chart
injury_killed_by_counts.plot(kind='bar', figsize=(10, 5))
plt.title('Types of Injuries and How Victims Were Killed')
plt.xlabel('Types of Injuries')
plt.ylabel('Total Fatalities')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Killed By')
plt.show()

### This graph shows israeli forces killed individuals most by gunfire.

# Analyze the ammunition and means by which the individuals were killed. Determine the most frequently used weapons or methods and evaluate their impact.

In [None]:
# Fill null values in ammunition column with the mode
mode_ammunition = df['ammunition'].mode()[0]
df['ammunition'].fillna(mode_ammunition, inplace=True)

# frequent means of killing
ammunition_impact = df.groupby('ammunition').size().sort_values(ascending=False)

# Bar chart to frequent means of killing
ammunition_impact.plot(kind='bar', figsize=(10, 5), color='skyblue')
plt.title('Most Frequently Used Means of Killing')
plt.xlabel('Means of Killing')
plt.ylabel('Total Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()


### This graph shows missile is most used for killing individuals.

In [None]:
# Convert date_of_death to datetime format 
df['date_of_death'] = pd.to_datetime(df['date_of_death'])

# Count the occurrences of each combination of ammunition and date_of_death
ammunition_time_counts = df.groupby(['ammunition', df['date_of_death'].dt.year]).size().unstack(fill_value=0)

# Line chart to show the trend over time
ammunition_time_counts.T.plot(kind='line', figsize=(10, 5), colormap='coolwarm')
plt.title('Trend of Means of Killing Over Time')
plt.xlabel('Year')
plt.ylabel('Total Fatalities')
plt.show()


### This graph shows the trend of year the individuals killed with ammunition. In 2014, missile are used at high rate to killed individuals and 2000 individuals are kiiled using missile.

# Create profiles of the victims based on the available data such as age, gender, citizenship, and place of residence. Identify common characteristics among the victims.

In [None]:

# Profile on age
plt.figure(figsize=(10, 5))
plt.hist(df['age'].dropna(), bins=20, color='skyblue', edgecolor='black', alpha=0.5)
plt.title('Distribution of Age of Victims')
plt.xlabel('Age')
plt.ylabel('Total Fatalities')
plt.show()



### This graph shows individuals with age 20 are killed most.

In [None]:
# Profile on gender
gender_counts = df['gender'].value_counts()
plt.figure(figsize=(10, 5))
plt.bar(gender_counts.index, gender_counts.values, color=['skyblue', 'lightgreen'])
plt.title('Distribution of Gender Among Victims')
plt.xlabel('Gender')
plt.ylabel('Total Fatalities')
plt.xticks(rotation=0)
plt.show()

### This graph shows mostly males are killed in contrast with females. Number of males killed are above 9500.

In [None]:
# Profile on citizenship
citizenship_counts = df['citizenship'].value_counts().head(10)  # To Display top 10
plt.figure(figsize=(10, 5))
plt.bar(citizenship_counts.index, citizenship_counts.values, color='lightgreen')
plt.title('Top 10 Citizenship Among Victims')
plt.xlabel('Citizenship')
plt.ylabel('Total Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()


### This graph shows Palestenians are most killed in contrast of Israel, Jordan and American. Number of Palestinians killed are 10,000.

In [None]:
# Profile on place of residence
residence_counts = df['place_of_residence'].value_counts().head(10)  # Display top 10
plt.figure(figsize=(10, 5))
plt.bar(residence_counts.index, residence_counts.values, color='green')
plt.title('Top 10 Places of Residence Among Victims')
plt.xlabel('Place of Residence')
plt.ylabel('Total Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()

### This graph shows victims living in Gaza city suffered most.