In [1]:
import pandas as pd

In [2]:
df_RAND = pd.read_csv("RAND_Database_of_Worldwide_Terrorism_Incidents.csv", encoding='latin-1')

In [3]:
!pip install spacy



In [5]:

import spacy

# Load the spaCy language model
nlp = spacy.load("en_core_web_sm")

# Function to extract relevant information from descriptions
def extract_info(description):
    if isinstance(description, str):
        doc = nlp(description)
        is_female_terrorist = False
        is_female_victim = False

        for token in doc:
            if token.text.lower() in ["woman", "women", "female"]:
                if "terrorist" in [t.text.lower() for t in token.head.subtree]:
                    is_female_terrorist = True
                elif "victim" in [t.text.lower() for t in token.head.subtree]:
                    is_female_victim = True

        return is_female_terrorist, is_female_victim
    else:
        return False, False

# Apply the function to your DataFrame
df_RAND['IsFemaleTerrorist'], df_RAND['IsFemaleVictim'] = zip(*df_RAND['Description'].apply(extract_info))

# Filter rows where women were identified as terrorists
female_terrorists = df_RAND[df_RAND['IsFemaleTerrorist']]

# Filter rows where women were identified as victims
female_victims = df_RAND[df_RAND['IsFemaleVictim']]

# Display the results
print("Incidents with female terrorists:")
print(female_terrorists)

print("\nIncidents with female victims:")
print(female_victims)


Incidents with female terrorists:
            Date        City                      Country  \
1793   13-Oct-77         NaN                        Spain   
3309   08-Mar-83        Bonn  Federal Republic of Germany   
6892   06-Jul-93   Jerusalem                       Israel   
17732  05-Dec-03  Yessentuki                       Russia   
19694  14-Oct-04       Najaf                         Iraq   

                                             Perpetrator  \
1793   Popular Front for the Liberation of Palestine ...   
3309                                               Other   
6892                                               Other   
17732                                       Black Widows   
19694                                            Unknown   

                       Weapon  Injuries  Fatalities  \
1793                  Unknown         0           1   
3309               Explosives         0           0   
6892   Knives & sharp objects         1           0   
17732             

In [6]:
female_terrorists

Unnamed: 0,Date,City,Country,Perpetrator,Weapon,Injuries,Fatalities,Description,IsFemaleTerrorist,IsFemaleVictim
1793,13-Oct-77,,Spain,Popular Front for the Liberation of Palestine ...,Unknown,0,1,SPAIN. A Lufthansa 737 Boeing jet was hijacke...,True,False
3309,08-Mar-83,Bonn,Federal Republic of Germany,Other,Explosives,0,0,FEDERAL REPUBLIC OF GERMANY. No one was injur...,True,False
6892,06-Jul-93,Jerusalem,Israel,Other,Knives & sharp objects,1,0,ISRAEL. A suspected Palestinian terrorist sta...,True,False
17732,05-Dec-03,Yessentuki,Russia,Black Widows,Explosives,165,46,A suicide bomber killed at least forty-six peo...,True,False
19694,14-Oct-04,Najaf,Iraq,Unknown,Explosives,0,0,Iraqi police arrested a woman wearing an explo...,True,False


## PART 2

In [1]:
econ_words = [
    "economy",
    "market",
    "inflation",
    "supply and demand",
    "GDP",
    "monetary policy",
    "fiscal policy",
    "trade",
    "investment",
    "commerce",
    "capitalism",
    "taxation",
    "unemployment",
    "consumer",
    "entrepreneur",
    "macroeconomics",
    "microeconomics",
    "demand curve",
    "supply curve",
    "inflation rate",
    "interest rates",
    "budget deficit",
    "exchange rate",
    "economic growth",
    "globalization",
    "business cycle",
    "monopoly",
    "oligopoly",
    "economic inequality",
    "commodity",
]

pol_words = [
    "politics",
    "government",
    "democracy",
    "election",
    "policy",
    "voting",
    "law",
    "legislation",
    "congress",
    "president",
    "parliament",
    "ideology",
    "partisanship",
    "lobbying",
    "constitution",
    "policy-making",
    "political party",
    "bureaucracy",
    "separation of powers",
    "international relations",
    "diplomacy",
    "human rights",
    "civil rights",
    "voter turnout",
    "political ideology",
    "gerrymandering",
    "public opinion",
    "political science",
    "lobbyist",
    "foreign policy",
]

rel_words = [
    "religion",
    "faith",
    "spirituality",
    "church",
    "temple",
    "mosque",
    "belief",
    "worship",
    "god",
    "prayer",
    "theology",
    "sacred",
    "doctrine",
    "spiritual",
    "clergy",
    "worshipper",
    "clergyman",
    "revelation",
    "scripture",
    "denomination",
    "pilgrimage",
    "ritual",
    "divine",
    "prophet",
    "afterlife",
    "paganism",
    "spiritualism",
    "synagogue",
    "blessing",
    "sacrifice",
]

# You can use these lists for various purposes in your Python code.


In [None]:
econ_RAND = df_RAND[df_RAND['Description'].str.contains('|'.join(econ_words)) & df_RAND['Description'].notna()]
econ_RAND

pol_RAND = df_RAND[df_RAND['Description'].str.contains('|'.join(pol_words)) & df_RAND['Description'].notna()]
pol_RAND

rel_RAND = df_RAND[df_RAND['Description'].str.contains('|'.join(rel_words)) & df_RAND['Description'].notna()]
rel_RAND

In [None]:
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

def classify_motive(description):
    if isinstance(description, str):
        description = description.lower()
        motive = None

        for word in econ_words:
            if word in description:
                motive = "Economic"
                break

        for word in pol_words:
            if word in description:
                motive = "Political"
                break

        for word in rel_words:
            if word in description:
                motive = "Religious"
                break

        return motive

df_RAND['Motive'] = df_RAND['Description'].apply(classify_motive)

df_RAND

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Assuming you've already imported your DataFrame as df_RAND
# And you have your 'Description' column in the DataFrame

# Replace NaN values in the 'Description' column with an empty string
df_RAND['Description'].fillna('', inplace=True)

# Split the data into training and testing sets
X = df_RAND['Description']
y = df_RAND['Motive']

# Create a new DataFrame to store the results, including the 'Motive' column
results_df = df_RAND.copy()

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Train a classifier (Multinomial Naive Bayes)
classifier = MultinomialNB()
classifier.fit(X_tfidf, y)

# Predict the 'Motive' values
results_df['Motive'] = classifier.predict(X_tfidf)

# Save the results to a new CSV file with the 'Motive' column
results_df.to_csv('classified_attacks.csv', index=False)

# Optional: Evaluate the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
y_pred = classifier.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")