In [1]:
import json
import os

In [2]:
def get_dataset_path(sub_directory,file_name):
    base_dir = os.getcwd()
    target_dir = os.path.dirname(base_dir)
    file_path = os.path.join(target_dir, sub_directory,file_name)
    return file_path

In [3]:
folder_path = get_dataset_path("MachineLearning","Scores")
file_list = os.listdir(folder_path)

brands = []

for file in file_list:
     if file.endswith('.json'):
        brands.append(file[:-5])

In [4]:
automobiles = brands.copy()

Read brand name sentiment score, date and number of comments data from `Score` folder

In [5]:
combined_scores = []
for brand in brands:
    file_path = get_dataset_path(folder_path,f'{brand}.json')
    scores = []
    
    try:
        with open(file_path, 'r') as json_file:
            json_data = json.load(json_file)
    except FileNotFoundError:
        print(f"JSON file '{json_file_path}' not found.")
    except json.JSONDecodeError as e:
        print(f"Unable to read JSON data: {e}")
        
    for i in range(len(json_data['date'])):
        data = []
        data.append(brand)
        data.append(json_data['date'][i])
        data.append(int(json_data['score'][i]))
        data.append(json_data['comment_number'][i])
        scores.append(data)
    combined_scores.append(scores)

In [6]:
len(combined_scores)

29

Read brand data is combined for comparison

In [7]:
combined_scores[:2]

[[['Alfa Romeo', '2023-07-29', 89, 28],
  ['Alfa Romeo', '2023-08-05', 84, 1],
  ['Alfa Romeo', '2023-08-19', 82, 1],
  ['Alfa Romeo', '2023-08-26', 75, 11],
  ['Alfa Romeo', '2023-09-02', 94, 2],
  ['Alfa Romeo', '2023-09-09', 93, 9],
  ['Alfa Romeo', '2023-09-16', 88, 11],
  ['Alfa Romeo', '2023-09-23', 87, 83],
  ['Alfa Romeo', '2023-09-30', 90, 91],
  ['Alfa Romeo', '2023-10-01', 85, 8],
  ['Alfa Romeo', '2023-10-02', 90, 7],
  ['Alfa Romeo', '2023-10-03', 86, 3],
  ['Alfa Romeo', '2023-10-04', 89, 1],
  ['Alfa Romeo', '2023-10-07', 88, 1]],
 [['Audi', '2023-07-29', 88, 185],
  ['Audi', '2023-08-05', 89, 89],
  ['Audi', '2023-08-12', 90, 72],
  ['Audi', '2023-08-19', 90, 76],
  ['Audi', '2023-08-26', 86, 40],
  ['Audi', '2023-09-02', 89, 33],
  ['Audi', '2023-09-09', 87, 41],
  ['Audi', '2023-09-16', 90, 40],
  ['Audi', '2023-09-23', 87, 115],
  ['Audi', '2023-09-30', 92, 44],
  ['Audi', '2023-10-03', 78, 1],
  ['Audi', '2023-10-06', 89, 6]]]

All data regardless of brand for continuous analysis

In [8]:
all_combined_scores = [brand_list for brand in combined_scores for brand_list in brand]

In [9]:
all_combined_scores[:2]

[['Alfa Romeo', '2023-07-29', 89, 28], ['Alfa Romeo', '2023-08-05', 84, 1]]

All dates commented on the site

In [10]:
dates = []
for combined_score in all_combined_scores:
    dates.append(combined_score[1])

In [11]:
dates[:6]

['2023-07-29',
 '2023-08-05',
 '2023-08-19',
 '2023-08-26',
 '2023-09-02',
 '2023-09-09']

In [12]:
len(dates)

353

Comments may have been made for different brands on the same date.

In [13]:
dates = set(dates)

In [14]:
len(dates)

17

In [15]:
dates

{'2023-07-29',
 '2023-08-05',
 '2023-08-12',
 '2023-08-19',
 '2023-08-26',
 '2023-09-02',
 '2023-09-09',
 '2023-09-16',
 '2023-09-23',
 '2023-09-30',
 '2023-10-01',
 '2023-10-02',
 '2023-10-03',
 '2023-10-04',
 '2023-10-05',
 '2023-10-06',
 '2023-10-07'}

To classify data in sentiment chart,column names

In [16]:
source = []
dates =sorted(list(dates))
dates_and_brand = dates.copy()
dates_and_brand.insert(0,"brand")
source.append(dates_and_brand)

In [17]:
dates

['2023-07-29',
 '2023-08-05',
 '2023-08-12',
 '2023-08-19',
 '2023-08-26',
 '2023-09-02',
 '2023-09-09',
 '2023-09-16',
 '2023-09-23',
 '2023-09-30',
 '2023-10-01',
 '2023-10-02',
 '2023-10-03',
 '2023-10-04',
 '2023-10-05',
 '2023-10-06',
 '2023-10-07']

In [18]:
source

[['brand',
  '2023-07-29',
  '2023-08-05',
  '2023-08-12',
  '2023-08-19',
  '2023-08-26',
  '2023-09-02',
  '2023-09-09',
  '2023-09-16',
  '2023-09-23',
  '2023-09-30',
  '2023-10-01',
  '2023-10-02',
  '2023-10-03',
  '2023-10-04',
  '2023-10-05',
  '2023-10-06',
  '2023-10-07']]

If a comment was made on the relevant date, the score of the comment is added to the date column. If not, 0

In [19]:
dates.insert(0,0)

In [20]:
dates[:2]

[0, '2023-07-29']

In [21]:
for brand_combined_score in combined_scores:
    brand =  [0] * 18
    brand[0]=brand_combined_score[0][0]
    for score in brand_combined_score:
        index = dates.index(score[1]) 
        brand[index] = score[2]
    source.append(brand)

In [22]:
source[1]

['Alfa Romeo', 89, 84, 0, 82, 75, 94, 93, 88, 87, 90, 85, 90, 86, 89, 0, 0, 88]

In [23]:
source[2]

['Audi', 88, 89, 90, 90, 86, 89, 87, 90, 87, 92, 0, 0, 78, 0, 0, 89, 0]

Average score of all comments on non-comment dates for continuous chart.

In [24]:
for brand in source:
    scores = brand[1:]
    if type(brand[2]) is str:
        continue
    else:
        average  = sum(scores)/len(scores)

        for i in range(len(scores)):
            if scores[i] == 0:
                scores[i] = int(average) 
    brand[1:] = scores

In [25]:
source[2]

['Audi', 88, 89, 90, 90, 86, 89, 87, 90, 87, 92, 62, 62, 78, 62, 62, 89, 62]

Save data

In [26]:
def save_data(file, data):
    folder_name = './GraphicData'
    
    if not os.path.exists(folder_name):
        try:
            os.mkdir(folder_name)
            print(f"{folder_name} folder successfully created.")
        except Exception as error:
            print(f"An error occurred: {str(error)}")
    else:
        print(f"{folder_name} folder already exists.")
    
    file_path = os.path.join(folder_name, file)
    try:
        with open(file_path, 'w', encoding="utf-8") as output_file:
            json.dump(data, output_file, indent=2)
        print(f"{file_path} file created.")
    except FileNotFoundError:
        print(f"There was an error in the file path or the directory does not exist.")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [28]:
save_data("sentiment_graphic_source.json",source)

./GraphicData folder successfully created.
./GraphicData\sentiment_graphic_source.json file created.


Total number of comments and average sentiment score for all brands

In [29]:
average_scores = []
comment_number = []
for brands in combined_scores:
    score = 0
    number = 0
    for brand in brands:
        score += brand[2]
        number += brand[3]
    average_scores.append(int(score/len(brands)))
    comment_number.append(number)

In [30]:
comment_number[:10]

[257, 742, 485, 14, 1979, 234, 1451, 487, 324, 324]

In [31]:
average_scores[:19]

[87, 87, 87, 92, 86, 88, 90, 88, 89, 88, 87, 85, 88, 91, 90, 90, 85, 88, 89]

In [32]:
save_data("average_score_source.json",average_scores)

./GraphicData folder already exists.
./GraphicData\average_score_source.json file created.


In [33]:
save_data("comment_number_graphic_source.json",comment_number)

./GraphicData folder already exists.
./GraphicData\comment_number_graphic_source.json file created.


In [34]:
save_data("automobiles.json",automobiles)

./GraphicData folder already exists.
./GraphicData\automobiles.json file created.
