## Importing libraries

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('fatalities.csv')


# Explore the dataset and identify the trends in fatalities over time. Identify any significant changes, spikes, or declines in the number of fatalities.

## Trend of Fatalities Over the Years

In [None]:
# Convert date_of_death to datetime format
df['date_of_death'] = pd.to_datetime(df['date_of_death'])

# Extract year and month from the date_of_death column
df['year'] = df['date_of_death'].dt.year
df['month'] = df['date_of_death'].dt.month

# Group by year and count the no of fatalities for each year
fatalities_by_year = df.groupby('year').size().reset_index(name='no_of_fatalities')

# Group by month and count the no of fatalities for each month
fatalities_by_month = df.groupby(['year', 'month']).size().reset_index(name='no_of_fatalities_monthly')

# Plot the trend of fatalities over the years
plt.figure(figsize=(10, 5))
sns.lineplot(x='year', y='no_of_fatalities', data=fatalities_by_year, marker='o')
plt.title('Trend of Fatalities Over the Years')
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')

# Annotation
for index, row in fatalities_by_year.iterrows():
    plt.annotate(f'{row["no_of_fatalities"]}', 
                 (row['year'], row['no_of_fatalities']),
                 textcoords="offset points",
                 xytext=(0, 10),
                 ha='center',
                 fontsize=8,
                 color='black')

plt.show()



#### This graph show trend of number of fatalities over year. The highest number of fatalities occurs is at the year 2014 which were 2326. Number of deaths from Year 2000-2015 are much higher as compare to Year 2015-onwards.

In [None]:
# Identify spikes in fatalities 
fatalities_by_year['spike'] = fatalities_by_year['no_of_fatalities'].pct_change() > 0.5

# Plot the trend of fatalities over the years 
plt.figure(figsize=(10, 5))
sns.lineplot(x='year', y='no_of_fatalities', data=fatalities_by_year, marker='o', hue='spike', palette={False: 'grey', True: 'red'})
plt.title('Trend of Fatalities Over the Years with Spikes Highlighted')
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')
plt.legend(title='Legend', loc='upper left', labels=['No Spike', 'Spike'], labelcolor=['grey', 'red'])
plt.show()

#### The spike in the graph indicates a trend from the year 2000 to 2015, if a particular year experienced a lower number of fatalities, the subsequent year tended to increase in fatalities. However, a noticeable shift occurred from 2015 onwards. During this period, the pattern changed slightly. If a year recorded a lower number of fatalities, it no longer consistently resulted in an increase in fatalities the following year.

## Trend of Fatalities Over the Months

In [None]:
# Plot the trend of fatalities over the months
plt.figure(figsize=(10, 5))
sns.lineplot(x='month', y='no_of_fatalities_monthly', data=fatalities_by_month, marker='o', hue='year')
plt.title('Trend of Fatalities Over the Months')
plt.xlabel('Month')
plt.ylabel('Number of Fatalities')
plt.legend(title='Year', loc='upper right')
plt.show()

#### The graph illustrates the trend in the number of fatalities over the months across years. The peak in fatalities occurred during the period from 2012 to 2016, specifically within the months of June to September. This timeframe witnessed the highest number of fatalities. Conversely, the period from 2016 to 2020 exhibited the lowest number of fatalities.

# Q2: Conduct an analysis by examining the age, gender, and citizenship of the individuals killed. Determine if there are any notable patterns or disparities in the data.

In [None]:
# Drop null values in age and gender
df.dropna(subset=['age', 'gender'], inplace=True)

## Age Distribuition of individuals killed

In [None]:
# Age Distribution
plt.figure(figsize=(10, 5))
sns.histplot(df['age'], kde=True, color='skyblue')
plt.title('Age Distribution of Individuals Killed')
plt.xlabel('Age')
plt.ylabel('Number of Fatalities')
plt.show()


#### The graph indicates an occurrence of fatalities among individuals around the age of 20. This pattern suggests a potential focus on targeting the younger generation, as this age group appears to be affected.

## Gender Distribuition of individuals Killed

In [None]:
# Gender Distribution
plt.figure(figsize=(10, 5))
df['gender'].value_counts().plot.pie(autopct='%1.0f%%', colors=['lightblue', 'lightcoral'])
plt.title('Gender Distribution of Individuals Killed')
plt.ylabel('')
plt.show()

#### The graph illustrates that approximately 87% of the individuals killed are male. This suggests a focus on targeting men, as the majority of the victims fall within this gender category.

## Citizenship Distribution of Individuals Killed 

In [None]:
# Citizenship Distribution
plt.figure(figsize=(10, 5))
sns.countplot(x='citizenship', data=df, order=df['citizenship'].value_counts().index, palette='viridis')
plt.title('Citizenship Distribution of Individuals Killed')
plt.xlabel('Citizenship')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')

# Add annotations showing the number of fatalities on top of each bar
for index, value in enumerate(df['citizenship'].value_counts()):
    plt.text(index, value + 5, str(value), ha='center', va='bottom', fontsize=8)

plt.show()


#### The graph shows the citizenship distribution of individuals killed in the conflict. The majority of fatalities  were Palestinians, followed by Israelis, Jordanians , and Americans.This graph highlights the disproportionate impact of the conflict on Palestinians, who make up about half of the population in the affected area. It also shows that the conflict is not limited to Palestinians and Israelis, but also affects people of other nationalities.

## Age Distribution by Citizenship

In [None]:
# Age and Citizenship Interaction
plt.figure(figsize=(10, 5))
sns.boxplot(x='citizenship', y='age', data=df, order=df['citizenship'].value_counts().index, palette='Set3')
plt.title('Age Distribution by Citizenship')
plt.xlabel('Citizenship')
plt.ylabel('Age')
plt.xticks(rotation=45, ha='right') 
plt.show()

#### This graph shows the age distribution by citizenship for Palestinian, Israeli, Jordanian, and American individuals killed in the conflict. This shows that median age is around 25 years old for all four citizenship groups. Palestinians have a longer tail on the right-hand side, indicating more older individuals killed. Israelis have a longer tail on the left-hand side, indicating more younger individuals killed. Americans have the longest interquartile range, indicating a wider age distribution.

## Age Distribution by Gender

In [None]:
# Age and Gender Interaction
plt.figure(figsize=(10, 5))
sns.boxplot(x='gender', y='age', data=df, palette='Set2')
plt.title('Age Distribution by Gender')
plt.xlabel('Gender')
plt.ylabel('Age')
plt.show()


#### This box and whisker plot shows the distribution of age by gender. The median age is slightly higher for males than for females. There are also more outliers on the right-hand side of the male boxplot. The male population is slightly older and more spread out in terms of age than the female population.

## Changes in Age Distribution Over Time

In [None]:
# Plot the changes in age distribution over time
plt.figure(figsize=(10, 5))
sns.lineplot(x='year', y='age', data=df, label='Age', marker='o')
plt.title('Changes in Age Distribution Over Time')
plt.xlabel('Year')
plt.ylabel('Average Age')
plt.legend()
plt.show()



##### The graph shows the distribution of ages over the years. Specifically, in the year 2014, the highest number of individuals killed were around the age of 23. The overall trend across the years indicates that most number of fatalities lies within the age range of 24 to 28. This observation suggests that their primary focus was on targeting the younger generation throughout the years.

## Changes in Citizenship Distribution Over Time

In [None]:
# Plot the changes in citizenship distribution over time
plt.figure(figsize=(10, 5))
sns.lineplot(x='year', y='count', hue='citizenship', data=df.groupby(['year', 'citizenship']).size().reset_index(name='count'), marker='o')
plt.title('Changes in Citizenship Distribution Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Individuals')
plt.legend(title='Citizenship', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()


#### The graph illustrates the distribution of citizens over the years. In the year 2014, the majority of individuals killed were citizens of Palestine.  Then Israelis accounted for a higher number of fatalities, surpassing Jordanians and Americans. The highest number of Israeli citizens killed occurred in the year 2002.

## Changes in Age Distribution and Number of Individuals Over Time

In [None]:
# Group by year and calculate the average age and count of individuals
age_and_count_by_year = df.groupby('year')['age'].agg(['mean', 'count']).reset_index()

# Plot the changes in age distribution over time with the number of individuals killed
fig, ax1 = plt.subplots(figsize=(10, 5))

# Line plot for average age
sns.lineplot(x='year', y='mean', data=age_and_count_by_year, label='Average Age',color='blue', marker='o', ax=ax1)
ax1.set_xlabel('Year')
ax1.set_ylabel('Average Age', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis to plot the count of individuals
ax2 = ax1.twinx()
sns.lineplot(x='year', y='count', data=age_and_count_by_year, label='Number of Individuals', marker='s', color='red', ax=ax2)
ax2.set_ylabel('Number of Individuals', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Overall title and legend
plt.title('Changes in Age Distribution and Number of Individuals Over Time')
fig.tight_layout()
plt.show()


####  The graph illustrates a trend in the distribution of ages over the years. Between 2000 and 2015, the average age of individuals killed was higher compared to the years after 2015. After 2015, their average age tended to be lower as compared to previous ones.

## Changes in Age Distribution and Number of Individuals Over Time

In [None]:

# Group by year, citizenship, and calculate the average age and count of individuals
age_and_count_by_year_citizenship = df.groupby(['year', 'citizenship'])['age'].agg(['mean', 'count']).reset_index()

# Plot the changes in age distribution over time with the number of individuals killed for each citizenship
plt.figure(figsize=(10, 5))

# Line plot for average age
sns.lineplot(x='year', y='mean', hue='citizenship', data=age_and_count_by_year_citizenship, palette='viridis', marker='o')
plt.xlabel('Year')
plt.ylabel('Average Age')
plt.title('Changes in Age Distribution Over Time by Citizenship')
plt.legend(title='Citizenship', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.show()

#### The attached graph shows that the average age of Israeli citizens has been increasing over time, while the average age of Palestinian citizens has been decreasing. This suggests that Israel kiiled were of high avg age, while Palestine killed were of lower age overthe time.

# Visualize the distribution of fatalities and identify areas that have experienced higher levels of violence.

## Distribution of Fatalities by Event Location District

In [None]:
# Plot the distribution of fatalities based on event location district
plt.figure(figsize=(10, 5))
sns.countplot(x='event_location_district', data=df, palette='viridis', order=df['event_location_district'].value_counts().index)

# Add annotations for number of fatalities
for index, value in enumerate(df['event_location_district'].value_counts()):
    plt.text(index, value, str(value), ha='center', va='bottom', fontsize=8)

plt.title('Distribution of Fatalities by Event Location District')
plt.xlabel('Event Location District')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()


#### This graph shows that most individuals killed are from Gaza reaching 2417. 

## Distribution of Fatalities by Event Location Region

In [None]:
# Plot the distribution of fatalities based on event location region
plt.figure(figsize=(10, 5))
sns.countplot(x='event_location_region', data=df, palette='viridis', order=df['event_location_region'].value_counts().index)

# Add annotations for number of fatalities
for index, value in enumerate(df['event_location_region'].value_counts()):
    plt.text(index, value, str(value), ha='center', va='bottom', fontsize=8)

plt.title('Distribution of Fatalities by Event Location Region')
plt.xlabel('Event Location Region')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()


#### This graph shows number of individuals killed are most in Gaza strip reaching 7664.

## Distribution of Age by Event Location District

In [None]:

# barplot to show the distribution of age by event location district
plt.figure(figsize=(10, 5))
sns.barplot(x='event_location_district', y='age', data=df, palette='viridis', errorbar="sd", order=df['event_location_district'].value_counts().index)
plt.title('Distribution of Age by Event Location District')
plt.xlabel('Event Location District')
plt.ylabel('Age')
plt.xticks(rotation=45, ha='right')
plt.show()


#### This graph shows tha analysis of even location district with age. This shows individuals kiiled in israeli has avg age highest among all.

## Distribution of Age by Event Location District

In [None]:

# barplot to show the distribution of age by event location region
plt.figure(figsize=(10, 5))
sns.barplot(x='event_location_region', y='age', data=df, palette='viridis', errorbar="sd", order=df['event_location_region'].value_counts().index)
plt.title('Distribution of Age by Event Location District')
plt.xlabel('Event Location Region')
plt.ylabel('Age')
plt.xticks(rotation=45, ha='right')
plt.show()



#### The graph indicates that on average, individuals killed in Gaza Strip were 27 years old, those killed in Israel were 37 years old, and those killed in the West Bank were 28 years old. This suggests that the average age of individuals killed in Israel was higher compared to Gaza Strip and the West Bank.

## Distribution of Fatalities by Gender in Event Location Districts

In [None]:
# Bar plot to show the count of fatalities by gender in each event location district
plt.figure(figsize=(10, 5))
sns.countplot(x='event_location_district', hue='gender', data=df, palette='viridis', order=df['event_location_district'].value_counts().index)

# Add annotations for number of fatalities
for p in plt.gca().patches:
    plt.gca().annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2., p.get_height()),
                       ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
                       textcoords='offset points')

plt.title('Distribution of Fatalities by Gender in Event Location Districts')
plt.xlabel('Event Location District')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Gender')
plt.show()


####  This graph shows Dsitribuitono of Fatalities with event location district. This graph shows that ratio of Male and Female killed in Gaza is highest.

## Distribution of Fatalities by Citizenship in Event Location Regions

In [None]:
# Bar plot to show the count of fatalities by citizenship in each event location region
plt.figure(figsize=(10, 5))
sns.countplot(x='event_location_region', hue='citizenship', data=df, palette='viridis', order=df['event_location_region'].value_counts().index)

# Add annotations for number of fatalities
for p in plt.gca().patches:
    plt.gca().annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2., p.get_height()),
                       ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
                       textcoords='offset points')

plt.title('Distribution of Fatalities by Citizenship in Event Location Regions')
plt.xlabel('Event Location Region')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Citizenship')
plt.show()


#### This graph showsd Distribuition of Fatalities with citizens in even location region. This shows that in Gaza strip and West Bank , mostly Palestinians were killed.

## Count of Fatalities by Event Location Region for Each Year

In [None]:
# Group by year, event location region, and event location district and calculate the count of fatalities
violence_by_year_region = df.groupby(['year', 'event_location_region']).size().reset_index(name='count')

# Plot the count of fatalities by event location region for each year
plt.figure(figsize=(16, 8))
sns.barplot(x='year', y='count', hue='event_location_region', data=violence_by_year_region, palette='viridis')
plt.title('Count of Fatalities by Event Location Region for Each Year')
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')
plt.legend(title='Event Location Region', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

#### This graph shows count of fatalites with region over years. In 2014, most individuals killed were from Gaza.

## Count of Fatalities by Event Location District for Each Year

In [None]:
violence_by_year_district = df.groupby(['year', 'event_location_district']).size().reset_index(name='count')

# Plot the count of fatalities by event location district for each year
plt.figure(figsize=(16, 8))
sns.barplot(x='year', y='count', hue='event_location_district', data=violence_by_year_district, palette='viridis')
plt.title('Count of Fatalities by Event Location District for Each Year')
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')
plt.legend(title='Event Location District', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

## Distribution of Gender by Event Location Region for Each Year

In [None]:
# Countplot to show the distribution of gender by event location region for each year
plt.figure(figsize=(10, 5))
sns.countplot(x='event_location_region', hue='gender', data=df, palette='viridis')

# Add annotations for number of fatalities
for p in plt.gca().patches:
    plt.gca().annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2., p.get_height()),
                       ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
                       textcoords='offset points')

plt.title('Distribution of Gender by Event Location Region for Each Year')
plt.xlabel('Event Location Region')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Gender', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()


#### This graph shows that in Gaza strip, motly males are kiiled reaching 6690. Overall in regions. mostly males are killed over females.

## Trend of Average Age of Individuals Killed by Event Location Region and Year

In [None]:
# Calculate the average age for each event location region and year
avg_age_by_region_year = df.groupby(['event_location_region', 'year'])['age'].mean().reset_index()

# Line plot to show the trend of average age by event location region for each year
plt.figure(figsize=(10, 5))
sns.lineplot(x='year', y='age', hue='event_location_region', data=avg_age_by_region_year, palette='viridis', marker='o')
plt.title('Trend of Average Age of Individuals Killed by Event Location Region and Year')
plt.xlabel('Year')
plt.ylabel('Average Age')
plt.legend(title='Event Location Region', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

#### This graph shows that in  avg age of Gaza Strip  is lower than israeli. This shows individuals with lower age are killed more in Gaza Strip as compared to israel.

# Q4: Examine the types of injuries inflicted on individuals. Identify the most common types of injuries and assess their severity.

## Distribution of Types of Injuries

In [None]:
# Count the occurrences of each type of injury
injury_counts = df['type_of_injury'].value_counts()

# Plot the distribution of injury types
plt.figure(figsize=(10, 5))
sns.barplot(x=injury_counts.index, y=injury_counts.values, palette='viridis')

# Add annotations for number of occurrences
for index, value in enumerate(injury_counts):
    plt.text(index, value, str(value), ha='center', va='bottom', fontsize=8)

plt.title('Distribution of Types of Injuries')
plt.xlabel('Type of Injury')
plt.ylabel('Number of Fatalities')
plt.xticks(rotation=45, ha='right')
plt.show()


#### This graph shows most type of injuries occur through gunfire.

## Relationship Between Type of Injury and Entities Responsible for Deaths

In [None]:
# Group by type_of_injury and killed_by then count the number of individuals killed
heatmap_data = df_filtered.groupby(['type_of_injury', 'killed_by']).size().unstack(fill_value=0)

# Plotting the heatmap
plt.figure(figsize=(10, 5))
sns.heatmap(heatmap_data, cmap='viridis', annot=True, fmt='g', linewidths=.5, cbar_kws={'label': 'Number of Individuals Killed'})
plt.title('Relationship Between Type of Injury and Entities Responsible for Deaths')
plt.xlabel('Entities Responsible for Deaths')
plt.ylabel('Type of Injury')
plt.tight_layout()
plt.show()


#### This graph shows israeli security forces killed individuals most by gunfire reaching 9223.

# Analyze the ammunition and means by which the individuals were killed. Determine the most frequently used weapons or methods and evaluate their impact.

## Distribution of Types of Ammunition

In [None]:
# Fill missing values in the ammunition column with the mode
df['ammunition'] = df['ammunition'].fillna(df['ammunition'].mode()[0])

# Count the occurrences of each type of ammunition
ammunition_counts = df['ammunition'].value_counts()

# Horizontal bar plot for types of ammunition
plt.figure(figsize=(10, 5))
sns.barplot(x=ammunition_counts.values, y=ammunition_counts.index, palette='viridis')

# Add annotations for number of occurrences
for index, value in enumerate(ammunition_counts.values):
    plt.text(value, index, str(value), ha='left', va='center', fontsize=8)

plt.title('Distribution of Types of Ammunition')
plt.xlabel('Count')
plt.ylabel('Type of Ammunition')
plt.tight_layout()
plt.show()


### This graph shows missile is most used for killing individuals.

## Relationship Between Type of Ammunition and Entities Responsible for Deaths

In [None]:

# Group by ammunition and killed_by, then count the number of individuals killed
heatmap_data = df_filtered.groupby(['ammunition', 'killed_by']).size().unstack(fill_value=0)

# Plotting the heatmap

plt.figure(figsize=(10, 5))
sns.heatmap(heatmap_data, cmap='viridis', annot=True, fmt='g', linewidths=.5, cbar_kws={'label': 'Number of Individuals Killed'})
plt.title('Relationship Between Type of Ammunition and Entities Responsible for Deaths')
plt.xlabel('Entities Responsible for Deaths')
plt.ylabel('Type of Ammunition')
plt.tight_layout()
plt.show()

#### This graph shows that israeli forces uses missile and live amminition most to kill individuals.

# Create profiles of the victims based on the available data such as age, gender, citizenship, and place of residence. Identify common characteristics among the victims.

## Age Distribution of Victims

In [None]:
# Age distribution
plt.figure(figsize=(10, 5))
sns.histplot(df['age'].dropna(), bins=30, kde=True, color='skyblue')
plt.title('Age Distribution of Victims')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()


#### This graph shows individuals with age 20 are killed most.

## Gender Distribution of Victims

In [None]:
# Gender distribution with custom colors
plt.figure(figsize=(10, 5))
gender_counts = df['gender'].value_counts()
sns.barplot(x=gender_counts.index, y=gender_counts.values, palette=['skyblue', 'lightgreen'])

# Add annotations for number of occurrences
for index, value in enumerate(gender_counts.values):
    plt.text(index, value, str(value), ha='center', va='bottom', fontsize=8, color='black')

plt.title('Gender Distribution of Victims')
plt.xlabel('Gender')
plt.ylabel('Number of Fatalities')
plt.show()


#### This graph shows mostly males are killed in contrast with females. Number of males killed reaching 9601.

## Top 5 Citizenship Distribution of Victims

In [None]:
# Citizenship distribution (Top 5) - Horizontal Bar Plot
plt.figure(figsize=(10, 5))
top_citizenships = df['citizenship'].value_counts().head(5)
sns.barplot(x=top_citizenships.values, y=top_citizenships.index, palette='Set3')

# Add annotations for number of occurrences
for index, value in enumerate(top_citizenships.values):
    plt.text(value, index, str(value), ha='left', va='center', fontsize=8)

plt.title('Top 5 Citizenship Distribution of Victims')
plt.xlabel('Number of Fatalities')
plt.ylabel('Citizenship')
plt.show()


#### This graph shows Palestenians are most killed in contrast of Israel, Jordan and American. Number of Palestinians killed are 10,000.

## Top 5 Place of Residence Distribution of Victims

In [None]:
# Place of residence distribution (Top 5)
plt.figure(figsize=(10, 5))
top_residences = df['place_of_residence'].value_counts().head(5)
sns.barplot(x=top_residences.index, y=top_residences.values, color='lightgreen')

# Add annotations for number of occurrences
for index, value in enumerate(top_residences.values):
    plt.text(index, value, str(value), ha='center', va='bottom', fontsize=8, color='black')

plt.title('Top 5 Place of Residence Distribution of Victims')
plt.xlabel('Place of Residence')
plt.ylabel('Number of Fatalities')
plt.show()


#### This graph shows victims living in Gaza city suffered most.