# Sentiment Analysis

In [12]:
import PyPDF2
import os
import re
from googletrans import Translator
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer


In [13]:
# Function to extract text from a single PDF file
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfFileReader(file)
        text = ""
        for page in range(reader.numPages):
            text += reader.getPage(page).extractText()
    return text

# Function to translate text from Russian to English
def translate_text_to_english(text, source_language='ru'):
    if not text:  # Checks if the text is None or empty
        return ""

    translator = Translator()
    try:
        translation = translator.translate(text, src=source_language, dest='en')
        return translation.text
    except Exception as e:
        print(f"Error during translation: {e}")
        return ""


# Function to preprocess the text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  # emoving extra spaces
    return text

In [14]:
# Sentiment analysis function
def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = sia.polarity_scores(text)
    compound_score = sentiment_scores['compound']
    
    if compound_score >= 0.05:
        return "Positive", compound_score
    elif compound_score <= -0.05:
        return "Negative", compound_score
    else:
        return "Neutral", compound_score


# Placeholder for additional analysis function
def perform_additional_analysis(texts):
    # Implement additional analysis techniques
    pass

def read_pdfs_from_directory(directory):
    texts = []
    for filename in os.listdir(directory):
        if filename.endswith('.pdf'):
            path = os.path.join(directory, filename)
            russian_text = extract_text_from_pdf(path)
            print(f"Extracted text from {filename}: {russian_text[:100]}...")  # Print first 100 characters
            english_text = translate_text_to_english(russian_text)
            texts.append(english_text)
    return texts

In [15]:
# Main execution for sentiment analysis
directory_path = "/home//dev/dev/research/GeospatialAnalysis/library"
texts = read_pdfs_from_directory(directory_path)
processed_texts = [preprocess_text(text) for text in texts]
sentiments = [analyze_sentiment(text) for text in processed_texts]

# Perform Sentiment Analysis
for sentiment in sentiments:
    print(f"Sentiment: {sentiment[0]}, Compound Score: {sentiment[1]}")

TypeError: the JSON object must be str, bytes or bytearray, not NoneType

# Spreadsheet Analysis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:

def read_and_prepare_data(filepath):
    df = pd.read_excel(filepath)

    # Assuming the file has columns like 'Year', 'Hours', and maybe others
    # If there are other important columns or preprocessing steps, add them here

    return df

def create_visualizations(df):
    # Visualization 1: Line plot of average hours per week over the years
    plt.figure(figsize=(10, 6))
    sns.lineplot(data=df, x='Year', y='Hours')
    plt.title('Average Hours per Week Over the Years')
    plt.xlabel('Year')
    plt.ylabel('Average Hours')
    plt.show()

    # Add more visualizations based on your data and requirements
    # For example, if there are different categories or groups, you might
    # want to create separate plots for each category

def perform_statistical_analysis(df):
    # Descriptive statistics
    summary = df.describe()
    print("Descriptive Statistics:\n", summary)

    # Check for correlation
    correlation = df.corr()
    print("\nCorrelation Matrix:\n", correlation)

    # More complex analyses can be added here, like regression analysis, 
    # if relevant to your data


In [None]:
def main():
    filepath = '/mnt/data/Hours per week.xlsx'
    df = read_and_prepare_data(filepath)
    create_visualizations(df)
    perform_statistical_analysis(df)

if __name__ == "__main__":
    main()