# Libraries to use

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.support.ui import Select

import requests
from bs4 import BeautifulSoup
import re as re
import time
import pandas as pd
import numpy as np

import pymysql
import sqlalchemy as alch
from sqlalchemy import create_engine
from getpass import getpass

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords

from wordcloud import WordCloud
from langdetect import detect
from textblob import TextBlob


# Import Al-Jazeera file and creating the DF

In [2]:
import src.aljazeera as aj

In [3]:
lst_aj = aj.get_aljazeera() # obtaining all the links of ukraine news

In [4]:
dict_aj = aj.create_dict_aj(lst_aj) # creating a dictionary with links and content

In [5]:
df_aj_descr= aj.descriptives(dict_aj) # counts words 'ukraine','war','russia','putin','zelensky' and sentimental analysis.

In [6]:
df_aj= aj.create_df_aj(dict_aj) # creates a DataFrame with the links and articles, transposing and creating a new index

In [7]:
df_aj_all =df_aj.join(df_aj_descr) #join both DF

In [8]:
df_aj_all.to_csv('data/aljazeera.csv') #save data into a file

In [9]:
df_aj_all.sample()

Unnamed: 0,link,article,polarity,subjectivity,length,ukraine,war,russia,putin,zelensky
33,https://www.aljazeera.com/news/2023/2/22/all-o...,The lessons of the Ukraine war are still being...,0.11617,0.34764,9164,25,24,21,3,0


# Import BBC file and creating a DF 

In [10]:
import src.bbc as bbc

In [11]:
lst_bbc_ukr = bbc.get_bbc() # obtaining all the links of ukraine news

In [12]:
dict_article_bbc = bbc.bbc_list(lst_bbc_ukr) # creating a dictionary with links and content

In [13]:
df_descriptive = bbc.descriptives(dict_article_bbc) # counts words 'ukraine','war','russia','putin','zelensky' and sentimental analysis.

In [14]:
df_bbc = bbc.create_df_bbc(dict_article_bbc) # creates a DataFrame with the links and articles, tansposing and creating a new index

In [15]:
df_bbc_together = df_bbc.join(df_descriptive)#join both DF
df_bbc_together.to_csv('data/bbc.csv')
df_bbc_together.sample()

Unnamed: 0,link,article,polarity,subjectivity,length,ukraine,war,russia,putin,zelensky
129,https://www.bbc.co.uk/news/uk-england-london-6...,Ukraine war: The first aid videos helping trai...,0.07438,0.2108,3779,11,10,2,0,0


# SQL 

## Getting into MySQL server and saving the data in tables

In [16]:
import src.sql_queries as sql

In [17]:
engine= sql.sql_connection() #starting the engine from SQL

MySQL password: ········


In [None]:
sql.include_into_sql(df_bbc_together,df_aj_all,engine) #to run once just to add the data into SQL

## Queries

### BBC queries

In [19]:
df_bbc_sql= pd.read_sql_query("""SELECT * FROM bbc """,con=engine)


In [20]:
df_bbc_sql = sql.df_bbc_sql_query(df_bbc_sql,engine)

In [21]:
df_bbc_sql= sql.bbc_df_clearance(df_bbc_sql)

### Aljazeera queries

In [22]:
df_aj_sql= pd.read_sql_query("""SELECT * FROM aljazeera """,con=engine)

In [23]:
df_aj_sql = sql.df_aj_sql_query(df_aj_sql,engine)

In [24]:
df_aj_sql = sql.aj_df_clearance(df_aj_sql)

In [25]:
df_together= pd.concat([df_bbc_sql, df_aj_sql], axis=0)

In [26]:
df_together.to_csv('data/journals.csv')

In [27]:
df_together

Unnamed: 0_level_0,polarity,subjectivity,(ukraine:length)*100,(war:length)*100,(russia:length)*100,(putin:length)*100,(zelensky:length)*100,link
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BBC,0.07627,0.36252,0.3006,0.2147,0.1718,0.0143,0.0286,https://www.bbc.co.uk/news/world-europe-64779505
BBC,0.04446,0.44867,0.2651,0.2121,0.1060,0.0177,0.0000,https://www.bbc.co.uk/news/uk-england-suffolk-...
BBC,0.02919,0.32360,0.2046,0.1508,0.3661,0.0108,0.0215,https://www.bbc.co.uk/news/world-europe-64718740
BBC,0.08788,0.44437,0.1635,0.1635,0.1144,0.0000,0.0000,https://www.bbc.co.uk/news/uk-england-merseysi...
BBC,0.06359,0.23566,0.5420,0.3523,0.0813,0.0000,0.0542,https://www.bbc.co.uk/news/world-europe-64779730
...,...,...,...,...,...,...,...,...
Aljazeera,0.01371,0.23858,0.1345,0.0384,0.3267,0.0769,0.0000,https://www.aljazeera.com/news/2023/1/21/us-de...
Aljazeera,0.07068,0.50042,0.4180,0.1194,0.1393,0.0000,0.0398,https://www.aljazeera.com/news/2023/1/20/no-le...
Aljazeera,0.04567,0.31887,0.1617,0.2611,0.2984,0.0746,0.0124,https://www.aljazeera.com/news/2023/1/20/russi...
Aljazeera,0.04126,0.38432,0.2854,0.1427,0.2854,0.0357,0.0714,https://www.aljazeera.com/news/2023/1/20/russi...
