In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setting up the Chrome WebDriver
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)

data = {'description':[], 'datetime':[], 'link':[]}
for i in range(29):
    url = f"https://www.adl.org/global-search?f[0]=topic%3A28&sort_by=dt_published_at&page={i}"
    driver.get(url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    try:
        WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "view-content")))
    except TimeoutException:
        print("Not found page: ", url)
        continue
    time.sleep(2)
    # Extract HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, "html.parser")
    topics = soup.find_all("div", {"class": "global-search__row"})
    for topic in topics:
        description = topic.find("div", {"class": "search-result__description"})
        time_tag = soup.find('time', class_='search-result__date')
        if time_tag:
            datetime_str = time_tag['datetime']
            data['datetime'].append(datetime_str)
        else:
            print("Time tag not found in the HTML.")
        link_tag = soup.find('a', class_='search-result__link')
        if link_tag:
            href = link_tag['href']
            data['link'].append(href)
        else:
            print("Link tag not found in the HTML.")
        data['description'].append(str(description.text.strip()))
        

KeyboardInterrupt: 

In [110]:
df_content = pd.DataFrame(data)

In [111]:
for key in data.keys():
    print(len(data[key]))

561
561
561


In [112]:
df_content

Unnamed: 0,description,datetime,link
0,"The death of Saleh Al-Arouri, a leading Hamas ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...
1,Cartoons and posts that accuse Israel of steal...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...
2,ADL report on the hateful antisemitic conspira...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...
3,Since the start of the Israel and Hamas confli...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...
4,This parent/family discussion guide helps fami...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...
...,...,...,...
556,"by: Simon Reich | January 01, 2000 Dimensions ...",2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...
557,Fifty Years of Forgetting and Remembering by: ...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...
558,When Jews began arriving in Palestine en masse...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...
559,Compliance and Confrontation by: Victoria J. B...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...


In [113]:
df_content['datetime'] = pd.to_datetime(df_content['datetime'])

In [115]:
df_content['date'] = df_content['datetime'].dt.strftime('%Y-%m-%d')

In [116]:
df_content

Unnamed: 0,description,datetime,link,date
0,"The death of Saleh Al-Arouri, a leading Hamas ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17
1,Cartoons and posts that accuse Israel of steal...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17
2,ADL report on the hateful antisemitic conspira...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17
3,Since the start of the Israel and Hamas confli...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17
4,This parent/family discussion guide helps fami...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17
...,...,...,...,...
556,"by: Simon Reich | January 01, 2000 Dimensions ...",2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02
557,Fifty Years of Forgetting and Remembering by: ...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02
558,When Jews began arriving in Palestine en masse...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02
559,Compliance and Confrontation by: Victoria J. B...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02


In [117]:
df_content['date'].value_counts()

date
2024-04-17    20
2018-10-31    20
2013-01-02    20
2014-07-18    20
2015-12-31    20
2016-04-28    20
2016-09-13    20
2017-01-12    20
2017-05-11    20
2017-08-17    20
2017-10-25    20
2018-01-09    20
2018-04-20    20
2018-07-19    20
2018-12-10    20
2022-09-28    20
2019-02-07    20
2019-05-14    20
2019-09-19    20
2019-12-11    20
2020-02-26    20
2020-06-12    20
2020-10-06    20
2021-02-11    20
2021-05-14    20
2021-08-25    20
2021-12-20    20
2022-04-21    20
1997-01-01     1
Name: count, dtype: int64

In [119]:
df_content['year'] = df_content['datetime'].dt.year

In [120]:
df_content['month'] = df_content['datetime'].dt.month

In [121]:
df_content['year'].value_counts()

year
2018    100
2021     80
2019     80
2017     80
2020     60
2022     40
2016     40
2024     20
2015     20
2014     20
2013     20
1997      1
Name: count, dtype: int64

In [122]:
df_content

Unnamed: 0,description,datetime,link,date,year,month
0,"The death of Saleh Al-Arouri, a leading Hamas ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
1,Cartoons and posts that accuse Israel of steal...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
2,ADL report on the hateful antisemitic conspira...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
3,Since the start of the Israel and Hamas confli...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
4,This parent/family discussion guide helps fami...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
...,...,...,...,...,...,...
556,"by: Simon Reich | January 01, 2000 Dimensions ...",2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02,2013,1
557,Fifty Years of Forgetting and Remembering by: ...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02,2013,1
558,When Jews began arriving in Palestine en masse...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02,2013,1
559,Compliance and Confrontation by: Victoria J. B...,2013-01-02 11:43:00,/resources/news/adl-and-mel-gibsons-passion-ch...,2013-01-02,2013,1


In [123]:
df_content.columns

Index(['description', 'datetime', 'link', 'date', 'year', 'month'], dtype='object')

In [126]:
import pandas as pd
import plotly.graph_objects as go

# Assume df_content is your DataFrame with the data

# Convert the 'datetime' column to datetime format
df_content['datetime'] = pd.to_datetime(df_content['datetime'])

# Calculate the total number of days before and after 07/10/2023
days_before = (pd.to_datetime('2023-07-10') - df_content['datetime'].min()).days
days_after = (df_content['datetime'].max() - pd.to_datetime('2023-07-10')).days

# Calculate the average number of incidents per day before and after 07/10/2023
avg_before = df_content[df_content['datetime'] < '2023-07-10'].shape[0] / days_before
avg_after = df_content[df_content['datetime'] >= '2023-07-10'].shape[0] / days_after

# Create a figure for the plot
fig = go.Figure()

# Add bar charts for the average number of incidents per day before and after the date
fig.add_trace(go.Bar(x=['Before 07/10/2023', 'After 07/10/2023'], y=[avg_before, avg_after]))

# Customize the layout of the plot
fig.update_layout(title='Average number of incidents per day before and after 07/10/2023',
                  xaxis_title='Date',
                  yaxis_title='Average number of incidents per day')

# Display the plot
fig.show()


In [128]:
df_content.to_csv('incid.csv')

In [129]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setting up the Chrome WebDriver
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)

data_USA = {'description':[], 'datetime':[], 'link':[]}
for i in range(43):
    url = f"https://www.adl.org/global-search?f[0]=topic%3A55&sort_by=dt_published_at&page{i}"
    driver.get(url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    try:
        WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "view-content")))
    except TimeoutException:
        print("Not found page: ", url)
        continue
    time.sleep(2)
    # Extract HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, "html.parser")
    topics = soup.find_all("div", {"class": "global-search__row"})
    for topic in topics:
        description = topic.find("div", {"class": "search-result__description"})
        time_tag = soup.find('time', class_='search-result__date')
        if time_tag:
            datetime_str = time_tag['datetime']
            data_USA['datetime'].append(datetime_str)
        else:
            print("Time tag not found in the HTML.")
        link_tag = soup.find('a', class_='search-result__link')
        if link_tag:
            href = link_tag['href']
            data_USA['link'].append(href)
        else:
            print("Link tag not found in the HTML.")
        data_USA['description'].append(str(description.text.strip()))
        

In [132]:
df_antisemit_incid_in_USA = pd.DataFrame(data_USA)

In [133]:
df_antisemit_incid_in_USA.head()

Unnamed: 0,description,datetime,link
0,Un pic massif a été enregistré après le 7 octo...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
1,Massiver Anstieg nach dem 7. Oktober; Campus-V...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
2,Se registró un aumento masivo después del 7 de...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
3,"In 2023, ADL tabulated 8,873 antisemitic incid...",2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
4,Un nuevo Boletín de Calificaciones muestra que...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...


In [134]:
df_antisemit_incid_in_USA

Unnamed: 0,description,datetime,link
0,Un pic massif a été enregistré après le 7 octo...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
1,Massiver Anstieg nach dem 7. Oktober; Campus-V...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
2,Se registró un aumento masivo después del 7 de...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
3,"In 2023, ADL tabulated 8,873 antisemitic incid...",2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
4,Un nuevo Boletín de Calificaciones muestra que...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
...,...,...,...
855,En los tres meses transcurridos desde el 7 de ...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
856,Dear University President: Enforce Codes of Co...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
857,An analysis of the campus climate for Jewish s...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...
858,This report documents and analyzes the nature ...,2024-04-19 12:14:47,/resources/press-release/les-incidents-antisem...


In [136]:
df_antisemit_incid_in_USA['datetime'] = pd.to_datetime(df_content['datetime'])
df_antisemit_incid_in_USA['date'] = df_antisemit_incid_in_USA['datetime'].dt.strftime('%Y-%m-%d')
df_antisemit_incid_in_USA['year'] = df_antisemit_incid_in_USA['datetime'].dt.year
df_antisemit_incid_in_USA['month'] = df_antisemit_incid_in_USA['datetime'].dt.month

In [137]:
df_antisemit_incid_in_USA.to_csv('incid_USA.csv')

In [None]:
#Extremism dataset

In [None]:
# link for extrm ::: https://www.adl.org/global-search?f[0]=topic%3A52&f[1]=topic%3A7237&sort_by=dt_published_at&page=1

In [138]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setting up the Chrome WebDriver
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)

data_extr = {'description':[], 'datetime':[], 'link':[]}
for i in range(6):
    url = f"https://www.adl.org/global-search?f[0]=topic%3A52&f[1]=topic%3A7237&sort_by=dt_published_at&page={i}"
    driver.get(url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    try:
        WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "view-content")))
    except TimeoutException:
        print("Not found page: ", url)
        continue
    time.sleep(2)
    # Extract HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, "html.parser")
    topics = soup.find_all("div", {"class": "global-search__row"})
    for topic in topics:
        description = topic.find("div", {"class": "search-result__description"})
        time_tag = soup.find('time', class_='search-result__date')
        if time_tag:
            datetime_str = time_tag['datetime']
            data_extr['datetime'].append(datetime_str)
        else:
            print("Time tag not found in the HTML.")
        link_tag = soup.find('a', class_='search-result__link')
        if link_tag:
            href = link_tag['href']
            data_extr['link'].append(href)
        else:
            print("Link tag not found in the HTML.")
        data_extr['description'].append(str(description.text.strip()))
        

In [139]:
df_extr = pd.DataFrame(data_extr)

In [140]:
df_extr['datetime'] = pd.to_datetime(df_extr['datetime'])
df_extr['date'] = df_extr['datetime'].dt.strftime('%Y-%m-%d')
df_extr['year'] = df_extr['datetime'].dt.year
df_extr['month'] = df_extr['datetime'].dt.month

In [141]:
df_extr

Unnamed: 0,description,datetime,link,date,year,month
0,Anti-Zionist student groups on more than 50 U....,2024-04-22 05:39:14,https://www.adl.org/resources/blog/campus-anti...,2024-04-22,2024,4
1,Anti-Zionist activists in the U.S. praised Ira...,2024-04-22 05:39:14,https://www.adl.org/resources/blog/campus-anti...,2024-04-22,2024,4
2,"Over the weekend of April 5, 2024, anti-Israel...",2024-04-22 05:39:14,https://www.adl.org/resources/blog/campus-anti...,2024-04-22,2024,4
3,CW: This piece contains reference to rape and ...,2024-04-22 05:39:14,https://www.adl.org/resources/blog/campus-anti...,2024-04-22,2024,4
4,Daniel Haqiqatjou is an antisemite and conspir...,2024-04-22 05:39:14,https://www.adl.org/resources/blog/campus-anti...,2024-04-22,2024,4
...,...,...,...,...,...,...
111,"Nakba (“catastrophe” in Arabic) Day, held on M...",2022-07-15 01:18:59,https://www.adl.org/resources/blog/jisr-collec...,2022-07-15,2022,7
112,After news broke of the deadly white supremaci...,2022-07-15 01:18:59,https://www.adl.org/resources/blog/jisr-collec...,2022-07-15,2022,7
113,The ADL Center on Extremism has found remarkab...,2022-07-15 01:18:59,https://www.adl.org/resources/blog/jisr-collec...,2022-07-15,2022,7
114,"In January 2022, the white supremacist Rise Ab...",2022-07-15 01:18:59,https://www.adl.org/resources/blog/jisr-collec...,2022-07-15,2022,7


In [None]:
# link for antiisr contenthttps://www.adl.org/global-search?sort_by=dt_published_at&f[0]=topic%3A16294&page=1

In [148]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setting up the Chrome WebDriver
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)

data_anti_isr_cont = {'description':[], 'datetime':[], 'link':[]}
for i in range(16):
    url = f"https://www.adl.org/global-search?sort_by=dt_published_at&f[0]=topic%3A16294&page={i}"
    driver.get(url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    try:
        WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "view-content")))
    except TimeoutException:
        print("Not found page: ", url)
        continue
    time.sleep(2)
    # Extract HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, "html.parser")
    topics = soup.find_all("div", {"class": "global-search__row"})
    for topic in topics:
        description = topic.find("div", {"class": "search-result__description"})
        time_tag = soup.find('time', class_='search-result__date')
        if time_tag:
            datetime_str = time_tag['datetime']
            data_anti_isr_cont['datetime'].append(datetime_str)
        else:
            print("Time tag not found in the HTML.")
        link_tag = soup.find('a', class_='search-result__link')
        if link_tag:
            href = link_tag['href']
            data_anti_isr_cont['link'].append(href)
        else:
            print("Link tag not found in the HTML.")
        data_anti_isr_cont['description'].append(str(description.text.strip()))
        

Not found page:  https://www.adl.org/global-search?sort_by=dt_published_at&f[0]=topic%3A16294&page=15


In [149]:
df_antiisr = pd.DataFrame(data_anti_isr_cont)

In [150]:
df_antiisr['datetime'] = pd.to_datetime(df_antiisr['datetime'])
df_antiisr['date'] = df_antiisr['datetime'].dt.strftime('%Y-%m-%d')
df_antiisr['year'] = df_antiisr['datetime'].dt.year
df_antiisr['month'] = df_antiisr['datetime'].dt.month

In [151]:
df_antiisr

Unnamed: 0,description,datetime,link,date,year,month
0,"The death of Saleh Al-Arouri, a leading Hamas ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
1,Anti-Zionist activists in the U.S. praised Ira...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
2,"Friday, March 8, 2024, marked the annual comme...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
3,"UPDATE: April 26, 2024 Explicit support for re...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
4,"In recent years, activists have harnessed the ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024,4
...,...,...,...,...,...,...
291,"Cynthia McKinney, a former U.S. Congresswoman ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013,4
292,"August 09, 2002 An e-mail message claims that ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013,4
293,"August 31, 2001 By Wayne Firestone \nDirector ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013,4
294,"August 09, 2001 By Abraham H. Foxman \n \nThis...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013,4


In [152]:
dfs = [df_content, df_antisemit_incid_in_USA, df_extr, df_antiisr]

# Concatenate vertically
merged_df = pd.concat(dfs, ignore_index=True)

In [153]:
merged_df

Unnamed: 0,description,datetime,link,date,year,month
0,"The death of Saleh Al-Arouri, a leading Hamas ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
1,Cartoons and posts that accuse Israel of steal...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
2,ADL report on the hateful antisemitic conspira...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
3,Since the start of the Israel and Hamas confli...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
4,This parent/family discussion guide helps fami...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
...,...,...,...,...,...,...
1828,"Cynthia McKinney, a former U.S. Congresswoman ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0
1829,"August 09, 2002 An e-mail message claims that ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0
1830,"August 31, 2001 By Wayne Firestone \nDirector ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0
1831,"August 09, 2001 By Abraham H. Foxman \n \nThis...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0


In [154]:
merged_df = merged_df.drop_duplicates()

In [155]:
merged_df

Unnamed: 0,description,datetime,link,date,year,month
0,"The death of Saleh Al-Arouri, a leading Hamas ...",2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
1,Cartoons and posts that accuse Israel of steal...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
2,ADL report on the hateful antisemitic conspira...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
3,Since the start of the Israel and Hamas confli...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
4,This parent/family discussion guide helps fami...,2024-04-17 12:39:10,/resources/blog/vitriolic-reactions-arouris-ki...,2024-04-17,2024.0,4.0
...,...,...,...,...,...,...
1828,"Cynthia McKinney, a former U.S. Congresswoman ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0
1829,"August 09, 2002 An e-mail message claims that ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0
1830,"August 31, 2001 By Wayne Firestone \nDirector ...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0
1831,"August 09, 2001 By Abraham H. Foxman \n \nThis...",2013-04-15 11:36:00,/resources/profile/osama-siblani-arab-american...,2013-04-15,2013.0,4.0


In [156]:
merged_df.to_csv('merged_df.csv')

In [None]:
import pandas as pd
import plotly.graph_objects as go

# Assume df_content is your DataFrame with the data

# Convert the 'datetime' column to datetime format
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])

# Calculate the total number of days before and after 07/10/2023
days_before = (pd.to_datetime('2023-07-10') - merged_df['datetime'].min()).days
days_after = (merged_df['datetime'].max() - pd.to_datetime('2023-07-10')).days

# Calculate the average number of incidents per day before and after 07/10/2023
avg_before = merged_df[merged_df['datetime'] < '2023-07-10'].shape[0] / days_before
avg_after = merged_df[merged_df['datetime'] >= '2023-07-10'].shape[0] / days_after

# Create a figure for the plot
fig = go.Figure()

# Add bar charts for the average number of incidents per day before and after the date
fig.add_trace(go.Bar(x=['Before 07/10/2023', 'After 07/10/2023'], y=[avg_before, avg_after]))

# Customize the layout of the plot
fig.update_layout(title='Average number of incidents per day before and after 07/10/2023',
                  xaxis_title='Date',
                  yaxis_title='Average number of incidents per day')

# Display the plot
fig.show()


In [None]:
#Scrapping new website #https://antisemitism.org.il/antisemitic-incidents/

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time


# Запускаем веб-драйвер Chrome
driver = webdriver.Chrome()  # Укажите путь к драйверу, если он не в PATH

# Открываем страницу
driver.get("https://antisemitism.org.il/antisemitic-incidents/")

# Ждем некоторое время перед началом прокрутки и нажатия кнопок
time.sleep(2)


# Прокручиваем страницу вниз и нажимаем на кнопку "Load more" 10 раз
for i in range(40):
    load_button = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CLASS_NAME, "ee-load-button__trigger")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    load_button.click()
    time.sleep(10)  # Ждем некоторое время после каждого нажатия

# Получаем весь контент после загрузки
page_content_inc = driver.page_source
soup_inc = BeautifulSoup(page_content_inc, "html.parser")
# Закрываем браузер
driver.quit()


KeyboardInterrupt: 

In [None]:
# soup_inc

In [214]:
incidents = soup_inc.find_all('div',{'class':'elementor-template'})

In [215]:
data_inc = {'title':[], 'date':[], 'link':[]}

for incident in incidents:
    date = incident.find('time').text
    title = incident.find('h3', class_='elementor-heading-title').text.strip()
    link = incident.find('h3', class_='elementor-heading-title').find('a')['href']
    
    data_inc['title'].append(title)
    data_inc['date'].append(date)
    data_inc['link'].append(link)
    

In [216]:
df_inc_ss = pd.DataFrame(data_inc)

In [217]:
df_inc_ss

Unnamed: 0,title,date,link
0,UK – Jewish man abused on London Tube,"March 11, 2024",https://antisemitism.org.il/2024/03/11/uk-jewi...
1,Canada – Pro-Palestinian protesters prevent at...,"March 6, 2024",https://antisemitism.org.il/2024/03/06/canada-...
2,UK – Newly elected British MP Galloway equates...,"March 6, 2024",https://antisemitism.org.il/2024/03/06/uk-newl...
3,Switzerland – Israel flag set on fire in front...,"March 4, 2024",https://antisemitism.org.il/2024/03/04/switzer...
4,Italy – A billboard with antisemitic message i...,"March 4, 2024",https://antisemitism.org.il/2024/03/04/italy-a...
...,...,...,...
3895,USA – Jewish man attacked with BB gun in Brooklyn,"October 29, 2021",https://antisemitism.org.il/2021/10/29/usa-jew...
3896,USA – Maine man was evicted from apartment aft...,"October 29, 2021",https://antisemitism.org.il/2021/10/29/usa-mai...
3897,USA – Antisemitic graffiti at Louisiana State ...,"October 29, 2021",https://antisemitism.org.il/2021/10/29/usa-ant...
3898,"USA – Newburgh, NY council censures councillor...","October 29, 2021",https://antisemitism.org.il/2021/10/29/usa-new...


In [218]:
df_inc_ss['date'] = pd.to_datetime(df_inc_ss['date'])

In [219]:
df_inc_ss['date'].min()

Timestamp('2021-10-28 00:00:00')

In [220]:
df_inc_ss['date'].max()

Timestamp('2024-03-11 00:00:00')

In [224]:
df_inc_ss['date'] = pd.to_datetime(df_inc_ss['date'])
df_inc_ss['year'] = df_inc_ss['date'].dt.year
df_inc_ss['month'] = df_inc_ss['date'].dt.month

In [229]:
country = []
for el in df_inc_ss['title']:
    el_c = el.split('–')[0].strip()
    country.append(el_c)

In [231]:
df_inc_ss['country'] = country

In [232]:
df_inc_ss

Unnamed: 0,title,date,link,year,month,country
0,UK – Jewish man abused on London Tube,2024-04-17,https://antisemitism.org.il/2024/03/11/uk-jewi...,2024.0,4.0,UK
1,Canada – Pro-Palestinian protesters prevent at...,2024-04-17,https://antisemitism.org.il/2024/03/06/canada-...,2024.0,4.0,Canada
2,UK – Newly elected British MP Galloway equates...,2024-04-17,https://antisemitism.org.il/2024/03/06/uk-newl...,2024.0,4.0,UK
3,Switzerland – Israel flag set on fire in front...,2024-04-17,https://antisemitism.org.il/2024/03/04/switzer...,2024.0,4.0,Switzerland
4,Italy – A billboard with antisemitic message i...,2024-04-17,https://antisemitism.org.il/2024/03/04/italy-a...,2024.0,4.0,Italy
...,...,...,...,...,...,...
3895,USA – Jewish man attacked with BB gun in Brooklyn,NaT,https://antisemitism.org.il/2021/10/29/usa-jew...,,,USA
3896,USA – Maine man was evicted from apartment aft...,NaT,https://antisemitism.org.il/2021/10/29/usa-mai...,,,USA
3897,USA – Antisemitic graffiti at Louisiana State ...,NaT,https://antisemitism.org.il/2021/10/29/usa-ant...,,,USA
3898,"USA – Newburgh, NY council censures councillor...",NaT,https://antisemitism.org.il/2021/10/29/usa-new...,,,USA


In [240]:
inc_by_country = df_inc_ss.groupby('country')['country'].value_counts().reset_index()

In [252]:
top_10_values = inc_by_country.nlargest(10, 'count')

In [253]:
top_10_values

Unnamed: 0,country,count
198,USA,1712
78,Germany,387
192,UK,341
66,France,178
106,Italy,147
39,Canada,131
20,Australia,109
151,Russia,68
168,Spain,68
200,Ukraine,52


In [254]:
df_inc_ss.to_csv('Incidents_data_final.csv')

Another way to scrap faster
--

In [20]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setting up the Chrome WebDriver
options = webdriver.ChromeOptions()
options.headless = True  # Set headless mode to True to run without opening a browser window
driver = webdriver.Chrome(options=options)

data_inc = {'title': [], 'date': [], 'link': []}
for i in range(1, 153):
    url = f"https://antisemitism.org.il/international-reports/?_sft_category=antisemitic-incidents&post_date=01012021+28042024&sf_paged={i}"
    driver.get(url)
    
    # Wait for the page to load by waiting for a specific element that's always present
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))  # Simple condition
    except TimeoutException:
        print("Timed out waiting for page to load: ", url)
        continue
    
    # Scroll to the bottom of the page (if needed)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow extra time for any lazy-loaded elements
    
    # Extract HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, "html.parser")
    articles = soup.find_all('div', class_='elementor-post__text')
    for article in articles:
        title_element = article.find('h3', class_='elementor-post__title')
        date_element = article.find('span', class_='elementor-post-date')
        if title_element and date_element:
            title = title_element.get_text(strip=True)
            date = date_element.get_text(strip=True)
            link = title_element.find('a')['href'] if title_element.find('a') else None
            data_inc['title'].append(title)
            data_inc['date'].append(date)
            data_inc['link'].append(link)

# Close the driver after the loop
driver.quit()

# Convert the collected data to a DataFrame
df_incidents = pd.DataFrame(data_inc)
print(df_incidents.head())  # Display the first few rows of the DataFrame


                                               title            date  \
0              UK – Jewish man abused on London Tube  March 11, 2024   
1  Canada – Pro-Palestinian protesters prevent at...   March 6, 2024   
2  UK – Newly elected British MP Galloway equates...   March 6, 2024   
3  Switzerland – Israel flag set on fire in front...   March 4, 2024   
4  Italy – A billboard with antisemitic message i...   March 4, 2024   

                                                link  
0  https://antisemitism.org.il/2024/03/11/uk-jewi...  
1  https://antisemitism.org.il/2024/03/06/canada-...  
2  https://antisemitism.org.il/2024/03/06/uk-newl...  
3  https://antisemitism.org.il/2024/03/04/switzer...  
4  https://antisemitism.org.il/2024/03/04/italy-a...  


In [21]:
df_incidents

Unnamed: 0,title,date,link
0,UK – Jewish man abused on London Tube,"March 11, 2024",https://antisemitism.org.il/2024/03/11/uk-jewi...
1,Canada – Pro-Palestinian protesters prevent at...,"March 6, 2024",https://antisemitism.org.il/2024/03/06/canada-...
2,UK – Newly elected British MP Galloway equates...,"March 6, 2024",https://antisemitism.org.il/2024/03/06/uk-newl...
3,Switzerland – Israel flag set on fire in front...,"March 4, 2024",https://antisemitism.org.il/2024/03/04/switzer...
4,Italy – A billboard with antisemitic message i...,"March 4, 2024",https://antisemitism.org.il/2024/03/04/italy-a...
...,...,...,...
4548,"USA – Antisemitic harassment in Ross, CA","January 1, 2021",https://antisemitism.org.il/2021/01/01/antisem...
4549,USA – Hate graffiti found at Missoula trail site,"January 1, 2021",https://antisemitism.org.il/2021/01/01/hate-gr...
4550,Spain – Antisemitic graffiti in Barcelona,"January 1, 2021",https://antisemitism.org.il/2021/01/01/antisem...
4551,USA – Fire at Portland synagogue set intention...,"January 1, 2021",https://antisemitism.org.il/2021/01/01/fire-at...


In [22]:
df_incidents['date'] = pd.to_datetime(df_incidents['date'])
df_incidents['year'] = df_incidents['date'].dt.year
df_incidents['month'] = df_incidents['date'].dt.month

In [23]:
country = []
for el in df_incidents['title']:
    el_c = el.split('–')[0].strip()
    country.append(el_c)
    
df_incidents['country'] = country    

In [25]:
inc_by_country = df_incidents.groupby('country')['country'].value_counts().reset_index()
top_10_values = inc_by_country.nlargest(10, 'count')
top_10_values

Unnamed: 0,country,count
442,USA,1838
197,Germany,438
435,UK,371
180,France,213
240,Italy,160
136,Canada,151
104,Australia,121
375,Spain,71
444,Ukraine,70
351,Russia,68


In [26]:
df_incidents['date'].min()

Timestamp('2021-01-01 00:00:00')

In [27]:
df_incidents['date'].max()

Timestamp('2024-03-11 00:00:00')

In [29]:
df_incidents.to_csv('Incidents_data_final.csv')