In [38]:
from azure.storage.blob import BlobServiceClient
from io import BytesIO
import pandas as pd
import os

# Get connection string
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

container_name = "team5container"
blob_path = "Silver/Search Trends/search_us.csv" 

# Connect and download
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_path)

stream = BytesIO()
blob_client.download_blob().readinto(stream)
stream.seek(0)

# Read CSV into DataFrame
df= pd.read_csv(stream)
df.head()

In [39]:
df.dtypes

In [40]:
df.rename(columns={'Week': 'date'}, inplace=True)
df.rename(columns={'iphone: (United States)': 'iphone'}, inplace=True)
df.rename(columns={'macbook: (United States)': 'macbook'}, inplace=True)
df.rename(columns={'ipad: (United States)': 'ipad'}, inplace=True)

In [41]:
df.head()

In [42]:
df[["iphone", "macbook", "ipad"]].describe()


In [43]:
from scipy.stats import zscore

cols = ['iphone', 'macbook', 'ipad']
df_z = df.copy()
df_z[cols] = df_z[cols].apply(zscore)


In [44]:
df_z['sentiment_score'] = df_z[cols].mean(axis=1)


In [45]:
def classify_sentiment(score):
    if score < -0.3:
        return 'negative'
    elif score > 0.3:
        return 'positive'
    else:
        return 'neutral'

df_z['sentiment'] = df_z['sentiment_score'].apply(classify_sentiment)


In [46]:
print(df_z['sentiment'].value_counts())


In [47]:
df_z.head()

In [50]:
import pyarrow as pa
import pyarrow.parquet as pq

# 1. Convert pandas DataFrame to Parquet format in memory
buffer = BytesIO()
table = pa.Table.from_pandas(df_z)
pq.write_table(table, buffer)
buffer.seek(0)  # Reset buffer position

# 2. Define the path within your Gold layer in the container
filename = "Gold/Search Trends/trends_with_sentiment.parquet"

# 3. Initialize BlobServiceClient
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
blob_client = blob_service_client.get_blob_client(container="team5container", blob=filename)

# 4. Upload Parquet file to Blob Storage
blob_client.upload_blob(buffer, overwrite=True)
print(f"File uploaded successfully to Blob Storage: {filename}")
