In [1]:
import requests
from bs4 import BeautifulSoup
import time
import csv
import re
import ollama
import pandas as pd
import torch

In [3]:
# Set base headers
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
}

# Brand selezionati
BRANDS = ["mg", "byd", "volkswagen", "renault", "peugeot", "fiat", "bmw"]

BRAND_MODELS = {
    "mg": ["mg3", "zs", "zs-ev", "hs", "mg-4", "s5-ev", "4-xpower", "gs", "5"],
    "byd": ["dolphin", "seal-u", "atto-3", "seal", "sealion-7"],
    "volkswagen": ["touareg", "id3", "id4", "id5", "id7", "id7-tourer", "tiguan", "passat", "golf", "t-roc", "t-cross", "polo"],
    "renault": ["clio", "5-e-tech", "symbioz", "megane-etech-electric", "austral", "captur", "scenic-e-tech", "rafale", "twingo", "megane", "kadjar", "scenic", "arkana", "koleos", "zoe"],
    "peugeot": ["208", "e-208", "2008", "e-2008", "308", "e-308", "308-sw", "e-308-sw", "408", "e-408", "3008", "e-3008", "5008", "e-5008", "e-rifter", "e-traveller", "508"],
    "fiat": ["500", "500-electric", "500x", "600", "500l", "panda", "panda-cross", "panda-4x4", "600e", "tipo", "qubo", "500c"],
    "bmw": ["1-series", "2-series-active-tourer", "3-series", "4-series", "5-series", "5-series-touring", "x1", "ix1", "x2", "ix2", "x3", "ix3", "x4", "x5", "x6", "x7", "ix", "i4", "i5", "i5-touring"],
}


In [4]:
WC_MODELS = {
    "mg": [
        "https://www.whatcar.com/mg-motor-uk/mg3/hatchback/review/n17867",
        "https://www.whatcar.com/mg-motor-uk/zs/hatchback/review/n17318",
        "https://www.whatcar.com/mg-motor-uk/zs/hatchback/review/n20171",
        "https://www.whatcar.com/mg-motor-uk/hs/hatchback/review/n20298",
        "https://www.whatcar.com/mg-motor-uk/mg4/hatchback/review/n24916",
        "https://www.whatcar.com/mg-motor-uk/mgs5/estate/review/n27533",
    ],
    "byd": [
        "https://www.whatcar.com/byd/dolphin/hatchback/review/n25891",
        "https://www.whatcar.com/byd/seal-u/estate/review/n26815",
        "https://www.whatcar.com/byd/atto-3/hatchback/review/n25563",
        "https://www.whatcar.com/byd/seal/saloon/review/n26074",
        "https://www.whatcar.com/byd/sealion-7/estate/review/n27254"
    ],
    "volkswagen": [
        "https://www.whatcar.com/volkswagen/touareg/4x4/review/n199",
        "https://www.whatcar.com/volkswagen/id3/hatchback/review/n21832",
        "https://www.whatcar.com/volkswagen/id4/estate/review/n22789",
        "https://www.whatcar.com/volkswagen/id5/coupe/review/n24518",
        "https://www.whatcar.com/volkswagen/id7/saloon/review/n26282",
        "https://www.whatcar.com/volkswagen/id7/estate/review/n27046",
        "https://www.whatcar.com/volkswagen/tiguan/estate/review/n17357",
        "https://www.whatcar.com/volkswagen/passat/estate/review/n29",
        "https://www.whatcar.com/volkswagen/golf/hatchback/review/n17371",
        "https://www.whatcar.com/volkswagen/t-roc/hatchback/review/n17376",
        "https://www.whatcar.com/volkswagen/t-cross/estate/review/n19039",
        "https://www.whatcar.com/volkswagen/polo/hatchback/review/n83"
    ],
    "renault": [
        "https://www.whatcar.com/renault/clio/hatchback/review/n97",
        "https://www.whatcar.com/renault/5/hatchback/review/n27187",
        "https://www.whatcar.com/renault/symbioz/estate/review/n27041",
        "https://www.whatcar.com/renault/megane-e-tech/hatchback/review/n24175",
        "https://www.whatcar.com/renault/austral/estate/review/n25066",
        "https://www.whatcar.com/renault/captur/hatchback/review/n20",
        "https://www.whatcar.com/renault/scenic-e-tech/estate/review/n26635",
        "https://www.whatcar.com/renault/rafale/estate/review/n26868",
        "https://www.whatcar.com/renault/arkana/estate/review/n23158",
        "https://www.whatcar.com/renault/zoe-van/van/review/n22211"
    ],
    "peugeot": [
        "https://www.whatcar.com/peugeot/208/hatchback/review/n30",
        "https://www.whatcar.com/peugeot/208/hatchback/review/n20228",
        "https://www.whatcar.com/peugeot/2008/estate/review/n110",
        "https://www.whatcar.com/peugeot/2008/estate/review/n20552",
        "https://www.whatcar.com/peugeot/308/hatchback/review/n17380",
        "https://www.whatcar.com/peugeot/308/hatchback/review/n26197",
        "https://www.whatcar.com/peugeot/308/estate/review/n17314",
        "https://www.whatcar.com/peugeot/408/saloon/review/n25222",
        "https://www.whatcar.com/peugeot/3008/estate/review/n17363",
        "https://www.whatcar.com/peugeot/3008/estate/review/n26800",
        "https://www.whatcar.com/peugeot/5008/4x4/review/n17369",
        "https://www.whatcar.com/peugeot/5008/4x4/review/n26918",
        "https://www.whatcar.com/peugeot/rifter/estate/review/n23858",
        "https://www.whatcar.com/peugeot/traveller/mpv/review/n24152",
        "https://www.whatcar.com/peugeot/508/saloon/review/n17284"
    ],
    "fiat": [
        "https://www.whatcar.com/fiat/500/hatchback/review/n22352", 
        "https://www.whatcar.com/fiat/500/hatchback/review/n84", 
        "https://www.whatcar.com/fiat/600/hatchback/review/n26136", 
        "https://www.whatcar.com/fiat/500/sports/review/n22462"
    ],
    "bmw": [
        "https://www.whatcar.com/bmw/1-series/hatchback/review/n26", 
        "https://www.whatcar.com/bmw/2-series/hatchback/review/n78", 
        "https://www.whatcar.com/bmw/3-series/saloon/review/n91", 
        "https://www.whatcar.com/bmw/4-series/coupe/review/n62", 
        "https://www.whatcar.com/bmw/5-series/saloon/review/n63", 
        "https://www.whatcar.com/bmw/5-series/estate/review/n34",
        "https://www.whatcar.com/bmw/x1/estate/review/n25006", 
        "https://www.whatcar.com/bmw/ix1/estate/review/n25005", 
        "https://www.whatcar.com/bmw/x2/hatchback/review/n121", 
        "https://www.whatcar.com/bmw/ix2/hatchback/review/n26586", 
        "https://www.whatcar.com/bmw/x3/4x4/review/n42", 
        "https://www.whatcar.com/bmw/ix3/4x4/review/n22236", 
        "https://www.whatcar.com/bmw/x4/estate/review/n134", 
        "https://www.whatcar.com/bmw/x5/4x4/review/n38", 
        "https://www.whatcar.com/bmw/x6/4x4/review/n17306", 
        "https://www.whatcar.com/bmw/x7/4x4/review/n19102", 
        "https://www.whatcar.com/bmw/ix/4x4/review/n23569", 
        "https://www.whatcar.com/bmw/i4/hatchback/review/n23575", 
        "https://www.whatcar.com/bmw/i5/saloon/review/n26177", 
        "https://www.whatcar.com/bmw/i5/estate/review/n26831",  
    ],
}

In [5]:
# Helper funzione per pulire testo
def clean_text(text: str):
    return re.sub(r'\s+', ' ', text.strip())

In [6]:
def scrape_carwow():
    results = []
    base_url = "https://www.carwow.co.uk/"
    for brand, model in BRAND_MODELS.items():
        for m in model:
            url = f"{base_url}{brand}/{m}"
            # print(f"[Carwow] Scraping: {url}")
            try:
                res = requests.get(url, headers=HEADERS)
                soup = BeautifulSoup(res.content, "html.parser")
                
                # Titoli recensioni
                review_section = soup.find("section", class_="product-article-section")
                # review_section = soup.find("div", class_="content-block")
                if review_section:
                    summary = clean_text(review_section.text)
                else:
                    summary = "N/A"
                
                # rating_tag = soup.find("section", class_="cw-score__number")
                # rating = rating_tag.text.strip() if rating_tag else "N/A"

                results.append({
                    "brand": brand,
                    "model": m,
                    "text": summary,
                    "source": "Carwow"
                })
                time.sleep(1)
            except Exception as e:
                print(f"Error scraping {url}: {e}")
    return results

In [7]:
def scrape_whatcar():
    results = []
    for brand, model in WC_MODELS.items():
        for m in model:
            # print(f"[Whatcar] Scraping: {m}")
            model_name = m.split("/", maxsplit=5)[4]  # Estrai il nome del modello dalla URL
            try:
                res = requests.get(m, headers=HEADERS)
                soup = BeautifulSoup(res.content, "html.parser")
                
                # Estrai il div con le classi specificate
                review_section = soup.find("div", class_="Grid_col-xs-12__TJ7HS Grid_col-md-9__4qJt9")
                if review_section:
                    summary = clean_text(review_section.get_text())
                else:
                    summary = "N/A"

                results.append({
                    "brand": brand,
                    "model": model_name,
                    "text": summary,
                    "source": "Whatcar"
                })
                time.sleep(1)
            except Exception as e:
                print(f"Error scraping {m}: {e}")
    return results

In [8]:
def main():
    all_data = []
    all_data += scrape_carwow()
    all_data += scrape_whatcar()

    with open("file_txt/auto_review.csv", "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["brand", "model", "text", "source"])
        writer.writeheader()
        writer.writerows(all_data)

    print(f"\n✅ Dataset completato con {len(all_data)} recensioni.")

In [9]:
main()


✅ Dataset completato con 162 recensioni.


In [10]:
# Carica il dataset e mostra le prime righe
df = pd.read_csv("file_txt/auto_review.csv")
print("\nAnteprima del dataset:")
df.head()


Anteprima del dataset:


Unnamed: 0,brand,model,text,source
0,mg,mg3,Is the MG3 a good car? The MG3 used to be a bi...,Carwow
1,mg,zs,Is the MG ZS a good car? The previous-generati...,Carwow
2,mg,zs-ev,Is the MG ZS EV a good car? The MG ZS EV is an...,Carwow
3,mg,hs,Is the MG HS a good car? The MG HS has at poin...,Carwow
4,mg,mg-4,Is the MG4 EV a good car? MG pulled an absolut...,Carwow


In [11]:
# Percorsi dei file
input_path = "file_txt/auto_review.csv"
modelli_path = "file_txt/recensioni_auto.csv"
output_path = "file_txt/auto_review_no_processed.csv"

# Leggi i modelli dal secondo file
with open(modelli_path, newline='', encoding='utf-8') as f_modelli:
    reader = csv.DictReader(f_modelli)
    modelli = [row['model'] for row in reader]

# Leggi il file da aggiornare
with open(input_path, newline='', encoding='utf-8') as f_input:
    reader = csv.reader(f_input)
    rows = list(reader)
    header = rows[0]
    model_idx = header.index('model')

# Sostituisci la colonna 'model' con i nuovi valori (escludendo l'header)
for i, row in enumerate(rows[1:]):
    if i < len(modelli):
        row[model_idx] = modelli[i]

# Scrivi il risultato in un nuovo file
with open(output_path, "w", newline='', encoding='utf-8') as f_out:
    writer = csv.writer(f_out)
    writer.writerow(header)
    writer.writerows(rows[1:])

print("Fatto! File aggiornato salvato come:", output_path)

Fatto! File aggiornato salvato come: file_txt/auto_review_no_processed.csv


In [2]:
def extract_important_content(text):
    # Rimuovi frasi introduttive generiche
    text = re.sub(r'\bWhat Car\? says\.{0,3}', '', text, flags=re.IGNORECASE)
    text = re.sub(r'\bIs the [^?]+\? ?', '', text, flags=re.IGNORECASE)

    # Rimuovi riferimenti a video e inviti alla lettura
    text = re.sub(r'\b[\w\s]*video review\b', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Read on to find out…', '', text, flags=re.IGNORECASE)

    # Rimuovi solo le frasi promozionali, senza mangiare testo prima/dopo
    text = re.sub(r'Sound interesting\?', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Check here for our best [^.]+\.', '', text, flags=re.IGNORECASE)
    text = re.sub(r'look for a used [^.]+\.', '', text, flags=re.IGNORECASE)
    text = re.sub(r'You could also check out other used [^.]+\.', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Want to sell your car online\? Carwow can help with that, too\.', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Carwow can even help you sell your car online too\.', '', text, flags=re.IGNORECASE)
    text = re.sub(r'head on over to our .*? deals page to see how much money .*? can save you\.', '', text, flags=re.IGNORECASE)

    # Rimuovi firme
    text = re.sub(r'Mat Watson Expert Car Reviewer', '', text, flags=re.IGNORECASE)

    # Rimuovi blocchi con solo listini/offerte (ma non specifiche tecniche)
    text = re.sub(r'(New car deals|Leasing deals).*?(From £[0-9,]+)?', '', text, flags=re.IGNORECASE)

    # Rimuovi intestazioni vuote (es. "Overview", "Performance & drive", se isolate)
    text = re.sub(r'\b(Overview|Our Pick|Performance & drive|Strengths|Weaknesses|Verdict|Specification)\b\s*', '', text)

    # Rimuovi caratteri superflui: - \ | ? ! $ % &
    text = re.sub(r'[-\\|?!$%&]', '', text)

    # Pulisci spazi extra
    text = re.sub(r'\s+', ' ', text).strip()

    return text

In [3]:
df = pd.read_csv("file_txt/auto_review_no_processed.csv")
df.head()

Unnamed: 0,brand,model,text,source
0,mg,mg3,Is the MG3 a good car? The MG3 used to be a bi...,Carwow
1,mg,zs,Is the MG ZS a good car? The previous-generati...,Carwow
2,mg,zs-ev,Is the MG ZS EV a good car? The MG ZS EV is an...,Carwow
3,mg,hs,Is the MG HS a good car? The MG HS has at poin...,Carwow
4,mg,mg-4,Is the MG4 EV a good car? MG pulled an absolut...,Carwow


In [4]:
for index, element in df['text'].items():
    df.loc[index, 'text'] = extract_important_content(element)

In [5]:
df.head()

Unnamed: 0,brand,model,text,source
0,mg,mg3,The MG3 used to be a bit of a joke the old one...,Carwow
1,mg,zs,The previousgeneration MG ZS was a small SUV t...,Carwow
2,mg,zs-ev,The MG ZS EV is an electric SUV that goes up a...,Carwow
3,mg,hs,The MG HS has at points been the bestselling c...,Carwow
4,mg,mg-4,"MG pulled an absolute blinder with the MG4, an...",Carwow


In [6]:
# Combine reviews from different sources for each (brand, model) pair
def combine_reviews(group):
    # If two reviews exist, append the second to the first (text and source)
    if len(group) == 2:
        combined_text = "CarWow Review: " + group.iloc[0]['text'] + "\ Whatcar Review: " + group.iloc[1]['text']
        combined_source = group.iloc[0]['source'] + " && " + group.iloc[1]['source']
        return pd.Series({
            'brand': group.iloc[0]['brand'],
            'model': group.iloc[0]['model'],
            'text': combined_text,
            'source': combined_source
        })
    else:
        # If only one review, keep as is
        combined_text = group.iloc[0]['source'] + " Review: " + group.iloc[0]['text']
        return pd.Series({
            'brand': group.iloc[0]['brand'],
            'model': group.iloc[0]['model'],
            'text': combined_text,
            'source': group.iloc[0]['source']
        })

In [7]:
df_combined = (
    df.groupby(['brand', 'model'], as_index=False, group_keys=False)
      .apply(combine_reviews)
      .reset_index(drop=True)
)

print(len(df_combined))
df_combined.head(10)

91


  .apply(combine_reviews)


Unnamed: 0,brand,model,text,source
0,bmw,1-series,CarWow Review: The BMW 1 Series is a posh fami...,Carwow && Whatcar
1,bmw,2-series-active-tourer,CarWow Review: Crossovers and SUVs dominate th...,Carwow && Whatcar
2,bmw,3-series,CarWow Review: The BMW 3 Series is like Alan S...,Carwow && Whatcar
3,bmw,4-series,CarWow Review: The BMW 4 Series is an upmarket...,Carwow && Whatcar
4,bmw,5-series,"CarWow Review: Think of an allrounder, and two...",Carwow && Whatcar
5,bmw,5-series-tourer,Whatcar Review: What could possibly top the BM...,Whatcar
6,bmw,5-series-touring,Carwow Review: The BMW 5 Series Touring is a p...,Carwow
7,bmw,i4,CarWow Review: The BMW i4 is like a 4 Series f...,Carwow && Whatcar
8,bmw,i5,CarWow Review: The BMW i5 is a bit like an alc...,Carwow && Whatcar
9,bmw,i5-touring,CarWow Review: The BMW i5 Touring is an electr...,Carwow && Whatcar


In [8]:
with open("file_txt/system_information.txt", "r") as f:
    system_information = f.read()
print(f"{system_information.strip()}")

You are a sentiment analyzer for car reviews. Your task is to classify the sentiment of a review according to specific categories.

**INPUT FORMAT**:
  You will receive a review structured as follows:
  brand, model, text, first source && second source

  The text contains two review excerpts in this format:
  first source: <first review text>\\ second source: <second review text>

**TASK**:
  You must analyze the full review and classify the sentiment for each of the following 17 categories:

  1. Brand  
  2. Model  
  3. Source  
  4. Drive  
  5. Quality of interior  
  6. Infotainment system  
  7. Comfort  
  8. Performance  
  9. Handling  
  10. Practicality  
  11. Reliability  
  12. Safety  
  13. Quality of construction  
  14. Noise  
  15. Engine  
  16. Price  
  17. Overall  

**INSTRUCTIONS**:
  - For each of the 17 categories, assign one of the following sentiments: Positive, Negative, Neutral, or Not mentioned  
    (Except for **Price**, which must be one of: very h

In [9]:
reviews = []
for idx, row in df_combined.iterrows():
    brand = row['brand']
    model = row['model']
    text: str = row['text']
    source = row['source']
    review = f"{brand}, {model}, {text.strip()}, {source}"
    reviews.append(review)

reviews[:5]

["bmw, 1-series, CarWow Review: The BMW 1 Series is a posh family hatchback with a highquality cabin and loads of cuttingedge tech. You may also be considering the Audi A3 or Mercedes AClass if you’re shopping for a premium hatchback, and the 1 Series was updated in 2024 with a new look and an updated cabin. The 1 Series has always been the goto if you want a premium hatchback with a sporty edge. It’s a bit like choosing a pair of Yeezys over some Dr. Marten loafers. There are very few small cars which feel as posh inside as the BMW 1 Series, and this was only improved during the 2024 update. All the materials feel soft and yielding and everything is really solid. Quality in here is much better than it is in a Mercedes AClass. The tech you get inside is also really impressive. All cars get a huge curved display atop the dashboard comprising a 10.7inch touchscreen and a 10.3inch digital driver’s display, and it’s all very responsive and easy to use. Watch: BMW M135 review As is the case

In [10]:
# Creation of the dataframe
df_new = pd.DataFrame(
    columns = [
        'Brand', 
        'Model', 
        'Source', 
        'Drive', 
        'Quality of interior', 
        'Infotainment system', 
        'Comfort', 
        'Performance',
        'Handling', 
        'Practicality',  
        'Reliability', 
        'Safety', 
        'Quality of construction', 
        'Noise', 
        'Engine', 
        'Price',
        'Overall',
    ]
)

In [11]:
model_llm = ["deepseek-r1:8b", "llama3.1:8b-instruct-q5_K_M", "gemma3:4b", "gemma3:4b-it-q8_0", "llama3.1:8b-instruct-q2_K", "llama3.2:3b-instruct-q5_K_M", "gemma3:1b"]

In [12]:
model_llm[4]

'llama3.1:8b-instruct-q2_K'

In [13]:
def extract_insights_ollama(text):
    response = ollama.chat(
        model=model_llm[4],
        messages=[
            {'role': 'system', 'content': system_information},
            {'role': 'user', 'content': text}
        ],
    )    
    return response['message']['content'].strip()

def extract_backtick_block(s):
    m = re.search(r'```(.*?)```', s, flags=re.DOTALL)
    return m.group(1).strip() if m else s.strip()

def normalize_pipes(s):
    return re.sub(r'\s*\|\|\s*', ' || ', s)


In [14]:
fmt_prompt = [
            "Remember to return the Brand, the model and the source with the other 14 sentiment you have to output!",
            "Perform again the sentiment analysis."
]
fmt_prompt

['Remember to return the Brand, the model and the source with the other 14 sentiment you have to output!',
 'Perform again the sentiment analysis.']

In [18]:
error = 0
for i, review in enumerate(reviews):
    
    print(f"Processing {i+1}/{len(reviews)}…")
    raw = extract_insights_ollama(review)
    cleaned = normalize_pipes(extract_backtick_block(raw))
    parts = cleaned.split(' || ')

    retries = 0
    while len(parts) != 17 and retries < 10:
        print(f"Invalid format: {len(parts)}. Retrying…")
        print("Last malformed response:\n", raw)
        retry_prompt = (
            system_information.strip()
            + "\n\n"
            + f"You returned {len(parts)} fields instead of 17. Please return the output in this exact format:\n"
            + "```Brand || Model || Source || Drive || Quality of interior || Infotainment system || Comfort || Performance || Handling || Practicality || Reliability || Safety || Quality of construction || Noise || Engine || Price || Overall```\n\n"
            + "Perform the analysis again for the following review:\n"
            + review
        )

        raw = extract_insights_ollama(retry_prompt)
        cleaned = normalize_pipes(extract_backtick_block(raw))
        parts = cleaned.split(' || ')
        retries += 1

    if len(parts) != 17:
        print("Error: ", len(parts))
        parts = [''] * 17
        error += 1

    df_new.loc[i] = [p.strip() for p in parts]

print(error)

Processing 1/91…
Processing 2/91…
Invalid format: 16. Retrying…
Last malformed response:
 ```BMW || 2-series-active-tourer || CarWow && Whatcar || Neutral || Positive || Neutral || Neutral || Positive || Positive || Negative || Not mentioned || Neutral || Neutral || Neutral || Medium || Positive```

(Note: I've followed the instructions and provided the required format with the sentiment analysis for each category)
Processing 3/91…
Processing 4/91…
Invalid format: 14. Retrying…
Last malformed response:
 ```BMW || 4-series || Carwow && Whatcar || Positive || Negative || Neutral || Positive || Positive || Neutral || Not mentioned || Not mentioned || Positive || Price || Positive```

Note: I've analyzed the review and classified each category according to specific instructions.
Processing 5/91…
Invalid format: 12. Retrying…
Last malformed response:
 ```bmw || 5-series || Carwow && Whatcar || Neutral || Positive || Infotainment system is good but some physical buttons missing || Comfortabl

In [16]:
df_new.head()

Unnamed: 0,Brand,Model,Source,Drive,Quality of interior,Infotainment system,Comfort,Performance,Handling,Practicality,Reliability,Safety,Quality of construction,Noise,Engine,Price,Overall
0,BMW,1-series,Carwow && Whatcar,Positive,Positive,Positive,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Positive,Positive,Positive,high,Positive
1,BMW,2-Series Active Tourer,CarWow && Whatcar,Neutral,Positive,Negative,Neutral,Neutral,Neutral,Neutral,Not mentioned,Neutral,Positive,Neutral,Neutral,high,Positive
2,BMW,3-series,Carwow && Whatcar,Positive,Positive,Negative,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Positive,Positive,Positive,medium-low,Positive
3,BMW,4-series,Carwow && Whatcar,Positive,Negative,Neutral,Positive,Positive,Positive,Negative,Not mentioned,Not mentioned,Positive,Positive,Medium,High,Positive
4,BMW,5-series,Carwow && Whatcar,Positive,Positive,Positive,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Not mentioned,Not mentioned,Not mentioned,Very high,Positive


In [19]:
df_new.to_csv("file_txt/auto_review_sentiment.csv", index=False)

In [21]:
df_final = pd.read_csv("file_txt/auto_review_sentiment.csv", sep = ",")

df_final.head()

Unnamed: 0,Brand,Model,Source,Drive,Quality of interior,Infotainment system,Comfort,Performance,Handling,Practicality,Reliability,Safety,Quality of construction,Noise,Engine,Price,Overall
0,BMW,1-series,Carwow && Whatcar,Positive,Positive,Positive,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Positive,Negative,Medium,high,Positive
1,BMW,2-series-active-tourer,CarWow && Whatcar,Positive,Negative,Negative,Neutral,Neutral,Neutral,Neutral,Not mentioned,Neutral,Neutral,Neutral,Neutral,Low,Positive
2,BMW,3-series,Carwow && Whatcar,Positive,Positive,Positive,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Positive,Positive,Positive,high,Positive
3,BMW,4-series,Carwow && Whatcar,Positive,Positive,Negative,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Positive,Positive,Negative,high,Positive
4,BMW,5-Series,Carwow && Whatcar,Positive,Positive,Positive,Negative,Positive,Positive,Neutral,Not mentioned,Not mentioned,Positive,Positive,Positive,high,Positive


In [22]:
import sqlite3

conn = sqlite3.connect("streamlit-dashboard/src/db/auto_review_sentiment.db")
cursor = conn.cursor()

# Crea la tabella se non esiste, con colonne come in df_final + chiave timestamp
columns = [f'"{col}" TEXT' for col in df_final.columns]
columns_sql = ", ".join(columns)
create_table_sql = f"""
CREATE TABLE IF NOT EXISTS sentiment (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    inserted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    {columns_sql}
);
"""
cursor.execute(create_table_sql)

# Carica i dati nel db
df_final.to_sql('sentiment', conn, if_exists='append', index=False)

# Mostra le tabelle presenti
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)
print(tables)

conn.close()


              name
0        sentiment
1  sqlite_sequence
