In [21]:
# Okay, so we've got about ~25 sentiment csv's for a number of companies.  Now we need to combine all of them together,
# recording the country, name, symbol, date collected, and sentiments for the company.  We'll worry about the actual stock
# values later, since those will need their own processing to fill in gaps.  
import glob
import csv

alldata = [['Country','Name','Symbol','Date Collected','Sentiments','Sentiment Score', 'Stock History']]

filedir='FinalProjectData_*.csv'
print(f'{len(glob.glob(filedir))} files to process.')
for file in glob.glob(filedir):
    print(f'Processing {file}...')
    date_collected = file.split("_")[1].split('.')[0]
    with open(file, newline='',encoding="utf8") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader) #Skip the first line (header information)
        for row in reader:
            country = row[0]
            name = row[1]
            symbol = row[2]
            sentiments = row[3]
            #Now, some of this data is unusable - not every stock gave us good sentiment data, or any at all.
            #We need to ignore these empty rows.
            if(sentiments != 'No data!'):  
                alldata.append([country, name, symbol, date_collected, sentiments, 'Uncalculated', 'Unknown'])
print('Done!')

24 files to process.
Processing FinalProjectData_2022-03-22.csv...
Processing FinalProjectData_2022-03-23.csv...
Processing FinalProjectData_2022-03-24.csv...
Processing FinalProjectData_2022-03-25.csv...
Processing FinalProjectData_2022-03-26.csv...
Processing FinalProjectData_2022-03-29.csv...
Processing FinalProjectData_2022-03-30.csv...
Processing FinalProjectData_2022-03-31.csv...
Processing FinalProjectData_2022-04-01.csv...
Processing FinalProjectData_2022-04-02.csv...
Processing FinalProjectData_2022-04-05.csv...
Processing FinalProjectData_2022-04-06.csv...
Processing FinalProjectData_2022-04-07.csv...
Processing FinalProjectData_2022-04-08.csv...
Processing FinalProjectData_2022-04-09.csv...
Processing FinalProjectData_2022-04-12.csv...
Processing FinalProjectData_2022-04-14.csv...
Processing FinalProjectData_2022-04-15.csv...
Processing FinalProjectData_2022-04-16.csv...
Processing FinalProjectData_2022-04-19.csv...
Processing FinalProjectData_2022-04-20.csv...
Processing Fi

In [None]:
# Now that we've got our easy data, let's move to the two important factors: Sentiment Score and Stock History.
# For Stock History, we're going to grab the close value of every stock for the past two years.
#     I know I originally said the past six weeks, but this seems silly.  We have *centuries* of stock data.
#     Why not use more of it?  However, the yahoo API does not let us access the value of the close when the
#     market is not open, such as on weekends or holidays.  We'll simply take the average of the last known
#     dates for that.
# For sentiment score, it's time to calculate it.
#     See the 'Final Project - Sentiment Score' notebook for the training of the model.


In [22]:
#Okay, we've got our dataset!  Let's move to pandas now that we're done iterating and save it locally.
import pandas as pd
df = pd.DataFrame(alldata[1:], columns=alldata[0])
df.to_csv("FinalProjectDataALL.csv",index=False)
df

Unnamed: 0,Country,Name,Symbol,Date Collected,Sentiments,Sentiment Score,Stock History
0,United States,Tesla,TSLA,2022-03-22,['Tesla opens first European gigafactory in Be...,Uncalculated,Unknown
1,Japan,Toyota,TM,2022-03-22,"['Toyota Pauses Production. Covid-19, Not Chip...",Uncalculated,Unknown
2,Germany,Volkswagen,VOW3.DE,2022-03-22,['U.S. companies pull back from business deali...,Uncalculated,Unknown
3,United States,General Motors,GM,2022-03-22,['Super luxury brands like Lamborghini and Ben...,Uncalculated,Unknown
4,United States,Ford,F,2022-03-22,['Super luxury brands like Lamborghini and Ben...,Uncalculated,Unknown
...,...,...,...,...,...,...,...
628,United States,Lordstown Motors,RIDE,2022-04-23,"[""Analyst on Tesla robotaxis: 'I will believe ...",Uncalculated,Unknown
629,Germany,Sono,SEV,2022-04-23,['Full Year 2021 Corporate Update: Sono Motor...,Uncalculated,Unknown
630,United States,Cenntro Electric Group,CENN,2022-04-23,['Cenntro Electric Group to Host 2021 Year End...,Uncalculated,Unknown
631,Canada,Electra Meccanica,SOLO,2022-04-23,['ElectraMeccanica Appoints Automotive Veteran...,Uncalculated,Unknown
