In [1]:
import streamlit as st
import plotly.express as px
import plotly.graph_objs as go
import pandas as pd
from collections import Counter
from datetime import datetime
import statistics
from collections import Counter
from dataclasses import asdict
import numpy as np

In [2]:
# Mock data for testing
mock_data = [
    {
        'review': {
            'score': 8.0,
            'title': 'Prima oplader, doet wat hij moet doen!',
            'plus': '',
            'min': '',
            'content': 'Prima oplader, doet wat hij moet doen!',
            'date': None
        },
        'analysis': {
            'emotion': 'happy',
            'score': 8
        }
    },
    {
        'review': {
            'score': 9.3,
            'title': 'Fijne, stabiele smartwatch',
            'plus': 'Zeer uitgebreide smartwatch;Bediening is door de knoppen intuïtief;Suggesties voor hardlopen, slaap en rust',
            'min': 'Nog geen',
            'content': 'De smartwatch voldoet ruimschoots aan de verwachtingen door de ruime mogelijkheden en de afleesbaarheid (zelfs in het donker)',
            'date': '4 december 2024'
        },
        'analysis': {
            'emotion': 'happy',
            'score': 9
        }
    },
    {
        'review': {
            'score': 8.0,
            'title': 'Deze kopen als de stekker geschikt is voor de poortuitgang v',
            'plus': '',
            'min': '',
            'content': 'Goede stekker die goed bij de poortingang past',
            'date': None
        },
        'analysis': {
            'emotion': 'happy',
            'score': 8
        }
    },
    {
        'review': {
            'score': 9.3,
            'title': 'Fijne, stabiele smartwatch',
            'plus': 'Zeer uitgebreide smartwatch;Bediening is door de knoppen intuïtief;Suggesties voor hardlopen, slaap en rust;Goede connectiviteit met apps',
            'min': 'Nog geen',
            'content': 'De smartwatch voldoet ruimschoots aan de verwachtingen door de ruime mogelijkheden en de afleesbaarheid (zelfs in het donker)',
            'date': '4 december 2024'
        },
        'analysis': {
            'emotion': 'happy',
            'score': 9
        }
    },
    {
        'review': {
            'score': 10.0,
            'title': 'fantastische service',
            'plus': '',
            'min': '',
            'content': 'Zeer goed apparaat',
            'date': None
        },
        'analysis': {
            'emotion': 'happy',
            'score': 9
        }
    },
    {
        'review': {
            'score': 10.0,
            'title': 'Fantastisch',
            'plus': 'Prachtige oplader;Laadt snel',
            'min': '',
            'content': 'Meer dan prima oplader',
            'date': '22 september 2024'
        },
        'analysis': {
            'emotion': 'happy',
            'score': 8
        }
    },
    {
        'review': {
            'score': 8.4,
            'title': 'Goed product en razendsnel bezorgd',
            'plus': '',
            'min': '',
            'content': 'Werkt geweldig en vlot',
            'date': None
        },
        'analysis': {
            'emotion': 'happy',
            'score': 9
        }
    },
    {
        'review': {
            'score': 8.0,
            'title': 'Toppy',
            'plus': 'Prima;Blij mee;Oke',
            'min': 'Geen;Geen;Geen',
            'content': 'Geen klachten',
            'date': '19 november 2024'
        },
        'analysis': {
            'emotion': 'happy',
            'score': 8
        }
    }
]


In [3]:
import plotly.io as pio
pio.renderers.default = 'notebook'  # Ensure Plotly renders in notebook mode


In [4]:
def plot_emotion_frequency(mock_data):
    """
    Creates a bar chart of emotion frequency.

    Parameters:
    analyzed_reviews (list): List of dictionaries containing review and analysis data.
    """
    allowed_emotions = ['angry', 'frustrated', 'sad', 'resentment', 'happy']
    emotions = [entry['analysis']['emotion'] for entry in mock_data if entry['analysis']['emotion'] in allowed_emotions]
    emotion_counts = Counter(emotions)

    for emotion in allowed_emotions:
        if emotion not in emotion_counts:
            emotion_counts[emotion] = 0

    # Bar Chart: Frequency of Emotions
    bar_fig = go.Figure()
    bar_fig.add_trace(go.Bar(x=list(emotion_counts.keys()), y=list(emotion_counts.values()), name='Emotion Frequency'))
    bar_fig.update_layout(
        title='Frequency of Emotions',
        xaxis_title='Emotion',
        yaxis_title='Count',
        template='plotly'
    )
    bar_fig.show()

import plotly.express as px
import plotly.graph_objs as go
import pandas as pd
from datetime import datetime

def plot_emotion_frequency(mock_data):
    """
    Creates a Plotly bar chart showing the frequency of emotions.
    
    Parameters:
    mock_data (list): List of dictionaries containing review and analysis data.
    
    Returns:
    plotly.graph_objs._figure.Figure: A Plotly bar chart of emotion frequencies
    """
    # Extract emotions
    emotions = [entry['analysis'].emotion for entry in mock_data]
    
    all_emotions = ['Boos', 'Gefrustreerd', 'Afgunst', 'Verdrietig', 'Blij', 'Happy']
    
    emotion_counts = pd.Series(emotions).value_counts()
    
    emotion_df = pd.DataFrame({'Emotion': emotion_counts.index, 'Count': emotion_counts.values})
    
    for emotion in all_emotions:
        if emotion not in emotion_df['Emotion'].values:
            emotion_df = pd.concat([emotion_df, pd.DataFrame({'Emotion': [emotion], 'Count': [0]})], ignore_index=True)
    
    fig = px.bar(emotion_df, 
                 x='Emotion', 
                 y='Count', 
                 title='Frequency of Emotions in Reviews',
                 labels={'Emotion': 'Emotion', 'Count': 'Number of Reviews'},
                 color='Emotion')
    
    return fig

def plot_emotions_over_time(mock_data):
    """
    Creates a Plotly line chart showing emotions over time.
    
    Parameters:
    mock_data (list): List of dictionaries containing review and analysis data.
    
    Returns:
    plotly.graph_objs._figure.Figure: A Plotly line chart of emotions over time
    """
    dates_emotions = []
    
    date_formats = [
        '%d %B %Y',   # "4 december 2024"
        '%d %b %Y',   # "4 dec 2024"
        '%d-%m-%Y',   # "04-12-2024"
        '%Y-%m-%d',   # "2024-12-04"
        '%d/%m/%Y',   
    ]
    
    for entry in mock_data:
        date = entry['review']['date']
        emotion = entry['analysis'].emotion
        score = entry['analysis'].score
        
        if date:
            # Try parsing with multiple date formats
            parsed_date = None
            for fmt in date_formats:
                try:
                    parsed_date = datetime.strptime(date, fmt)
                    break
                except ValueError:
                    continue
            
            # If date was successfully parsed, add to list
            if parsed_date:
                dates_emotions.append({
                    'Date': parsed_date, 
                    'Emotion': emotion,
                    'Score': score
                })
    
    # If no valid dates, return an empty figure
    if not dates_emotions:
        fig = go.Figure()
        fig.update_layout(title="No Emotion Data Available")
        return fig
    
    # Create DataFrame
    df = pd.DataFrame(dates_emotions)
    df = df.sort_values('Date')
    
    # Create line chart
    fig = px.line(df, x='Date', y='Score', color='Emotion', 
                  title='Emotion Scores Over Time',
                  labels={'Score': 'Emotion Score', 'Date': 'Review Date'})
    
    return fig

# Example usage function
def plot_emotion_analysis(mock_data):
    """
    Generates and returns both emotion frequency and time-based charts.
    
    Parameters:
    mock_data (list): List of dictionaries containing review and analysis data.
    
    Returns:
    tuple: (frequency_chart, time_chart)
    """
    # Create frequency bar chart
    frequency_chart = plot_emotion_frequency(mock_data)
    
    # Create emotions over time line chart
    time_chart = plot_emotions_over_time(mock_data)
    
    return frequency_chart, time_chart

In [None]:
# Example function call for bar chart
bar_chart = plot_emotion_frequency(mock_data)  # This should return a Plotly figure
bar_chart.show()  # Render the figure in a notebook

# Example function call for line chart
line_chart = plot_emotions_over_time(mock_data)  # This should return a Plotly figure
line_chart.show()  # Render the figure in a notebook


In [4]:
# def extract_plus_and_min(data):
#     plus_list = []
#     min_list = []
    
#     for item in data:
#         review = item['review']
        
#         # Split and clean plus list
#         if review['plus']:
#             current_plus = [p.strip() for p in review['plus'].split(';') if p.strip()]
#             plus_list.extend(current_plus)
        
#         # Split and clean min list
#         if review['min']:
#             current_min = [m.strip() for m in review['min'].split(';') if m.strip()]
            
#             # If current_min is empty, extend with None or empty string
#             if not current_min:
#                 current_min = ['Geen' for _ in current_plus] if current_plus else ['']
            
#             min_list.extend(current_min)
    
#     # Ensure equal length by padding the shorter list
#     max_length = max(len(plus_list), len(min_list))
    
#     # Pad plus_list if needed
#     if len(plus_list) < max_length:
#         plus_list.extend([''] * (max_length - len(plus_list)))
    
#     # Pad min_list if needed
#     if len(min_list) < max_length:
#         min_list.extend([''] * (max_length - len(min_list)))
    
#     df = pd.DataFrame({
#         'Positief': plus_list,
#         'Negatief': min_list
#     })
    
#     return df

In [None]:
# plus_min_list = extract_plus_and_min(mock_data)
# print(plus_min_list)

In [7]:
def calculate_scores(data):
    """
    Extracts scores from the 'analysis' field and calculates the average and median.

    Args:
    - data (list): List of dictionaries containing an 'analysis' key with a 'score'.

    Returns:
    - dict: A dictionary containing the extracted scores, average, and median.
    """
    scores = []

    for item in data:
        # Ensure 'analysis' exists and is a dictionary with a 'score' key
        analysis = item.get('analysis', None)
        if isinstance(analysis, dict) and 'score' in analysis:
            scores.append(analysis['score'])
        else:
            print(f"Skipping invalid or missing score in item: {item}")

    if not scores:
        return {
            "scores": [],
            "average": None,
            "median": None
        }

    # Calculate average and median
    average = sum(scores) / len(scores)
    median = statistics.median(scores)

    return {
        "scores": scores,
        "average": average,
        "median": median
    }


In [None]:
result = calculate_scores(mock_data)

# Unpack the dictionary
scores_list = result['scores']
average = result['average']
median = result['median']

# Print the results
print(scores_list)
print(average)
print(median)


In [9]:
def most_common_emotion(data):

    """
    Bepaalt de vaakst voorkomende emotie in het 'analysis'-veld van de dataset.

    Args:
    - data (list): Lijst van dictionaries met een 'analysis'-key die een 'emotion' bevat.

    Returns:
    - str: De meest voorkomende emotie, of None als er geen emoties zijn.
    """
    emotions = []

    for item in data:
        # Check of 'analysis' bestaat en een emotie bevat
        analysis = item.get('analysis', None)
        if isinstance(analysis, dict) and 'emotion' in analysis:
            emotions.append(analysis['emotion'])
        else:
            print(f"Skipping invalid or missing emotion in item: {item}")

    if not emotions:
        return None  # Geen emoties gevonden

    # Tel de frequenties en geef de vaakst voorkomende emotie terug
    emotion_counts = Counter(emotions)
    most_common = emotion_counts.most_common(1)[0]  # Tuple: (emotie, frequentie)
    
    return most_common[0]  # Alleen de emotie retourneren


In [None]:
most_common_emote = most_common_emotion(mock_data)
print(most_common_emote)

In [1]:
def extract_review_data(data):
    extracted_data = []
    for item in data:
        review = item['review']
        analysis = item['analysis']

        # Split plus and min into lists, removing empty strings
        plus_list = [p.strip() for p in review['plus'].split(';') if p.strip()] if review['plus'] else []
        min_list = [m.strip() for m in review['min'].split(';') if m.strip()] if review['min'] else []

        combined = {
            'score': review['score'],
            'title': review['title'],
            'plus_list': plus_list,  # Now a list
            'min_list': min_list,    # Now a list
            'plus': review['plus'],
            'min': review['min'],
            'content': review['content'],
            'date': review['date'],
            'analysis_emotion': analysis['emotion'],
            'analysis_score': analysis['score']
        }
        extracted_data.append(combined)
    
    return pd.DataFrame(extracted_data)

In [None]:
# Toepassen van de functie
df = extract_review_data(mock_data)
print(df)  # Prints the full DataFrame


In [None]:
chart_data = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
print(chart_data)

In [5]:

def extract_dates_and_emotions(mock_data):
    """
    Extracts dates and emotions from the provided mock data and returns a DataFrame
    with two columns: 'Date' and 'Emotion'.

    Parameters:
    mock_data (list): List of dictionaries containing review and analysis data.

    Returns:
    pd.DataFrame: DataFrame with 'Date' and 'Emotion' columns.
    """
    # Initialize empty lists for the dates and emotions
    dates = []
    emotions = []

    # Iterate over each review and extract the date and emotion
    for entry in mock_data:
        date = entry['review']['date']
        emotion = entry['analysis']['emotion']
        
        # Only add non-None dates to the lists
        if date:
            dates.append(date)
            emotions.append(emotion)

    # Create a new DataFrame from the lists
    df_emotions = pd.DataFrame({
        'Date': dates,
        'Emotion': emotions
    })

    df_emotions = df_emotions.drop_duplicates()


    return df_emotions

In [6]:
print(extract_dates_and_emotions(mock_data))

                Date Emotion
0    4 december 2024   happy
2  22 september 2024   happy
3   19 november 2024   happy
