In [None]:
import json
import os
import re
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
# Read JSON data
ideas_file_path = r"C:\Users\muski\Desktop\NewProject\Part I\ideas.json"
comments_file_path = r"C:\Users\muski\Desktop\NewProject\Part I\comments.json"

In [None]:
# Read JSON data for ideas
with open(ideas_file_path, 'r') as ideas_file:
    ideas_data = json.load(ideas_file)

In [None]:
# Read JSON data for comments
with open(comments_file_path, 'r') as comments_file:
    comments_data = json.load(comments_file)

In [None]:
# Accessing and printing ideas data
print("Ideas:")
for idea in ideas_data:
    idea_id = idea['id']
    title = idea['title']
    description = idea['description']
    author_id = idea['authorId']

    print(f"Idea ID: {idea_id}, Title: {title}, Description: {description}, Author ID: {author_id}")


In [None]:
# Accessing and printing comments data
print("\nComments:")
for comment in comments_data:
    comment_id = comment['id']
    entity_id = comment['entityId']
    entity_type = comment['entityType']
    author_id = comment['authorId']
    body = comment['body']

    print(f"Comment ID: {comment_id}, Entity ID: {entity_id}, Entity Type: {entity_type}, Author ID: {author_id}, Body: {body}")

In [None]:
# Initialize the sentiment analyzer
sid = SentimentIntensityAnalyzer()

#### This sentiment analyzer is commonly used for basic sentiment analysis tasks, and it provides a quick way to assess the sentiment of a given piece of text.

In [None]:
# Analyze and print sentiment for idea descriptions
print("Sentiment of Idea Descriptions:")
for idea in ideas_data:
    description = idea['description']
    sentiment_scores = sid.polarity_scores(description)
    sentiment = "Positive" if sentiment_scores['compound'] >= 0 else "Negative"
    print(f" ID: {idea['id']}, Sentiment: {sentiment}")

In [None]:
# Analyze and print sentiment for comments
print("\nSentiment of Comments:")
for comment in comments_data:
    body = comment['body']
    sentiment_scores = sid.polarity_scores(body)
    sentiment = "Positive" if sentiment_scores['compound'] >= 0 else "Negative"
    print(f"Comment ID: {comment['id']}, Sentiment: {sentiment}")

#### Using VADER(Valence Aware Dictionary and Sentiment Reasoner) sentiment analysis tool to determine the sentiment of both idea descriptions and comments. 
The sentiment is classified as "Positive" if the compound score is greater than or equal to 0, and "Negative" otherwise. 

### Q: Determining the dominant innovation type for each idea in the dataset involves analyzing the content of the idea descriptions and identifying keywords or patterns associated with different innovation types.

In [None]:
# Define patterns for different innovation types in Portuguese
innovation_patterns = {
    'Products': r'\b(?:produtos?)\b',
    'Services': r'\b(?:serviços?)\b',
    'Business_models': r'\b(?:modelos?\s*de\s*negócios?)\b',
    'Work_practices': r'\b(?:mudança\s*nas\s*práticas\s*de\s*trabalho)\b',
    'Marketing': r'\b(?:comercialização)\b',
    'Cost_savings': r'\b(?:economia\s*de\s*custos)\b',
}


In [None]:
# Function to determine the dominant innovation type for an idea
def get_dominant_innovation_type(description):
    for innovation_type, pattern in innovation_patterns.items():
        if re.search(pattern, description, flags=re.IGNORECASE):
            return innovation_type
    return 'Unknown'

In [None]:
# Analyze and print the dominant innovation type for each idea
print("Dominant Innovation Type for Each Idea:")
for idea in ideas_data:
    idea_id = idea['id']
    description = idea['description']
    dominant_type = get_dominant_innovation_type(description)
    print(f"Idea ID: {idea_id}, Dominant Innovation Type: {dominant_type}")

##### Using function get_dominant_innovation_type(description) for any idea description to get its dominant innovation type. 
