In [None]:
pip install cassandra-driver pymongo

In [None]:
from cassandra.cluster import Cluster
from pymongo import MongoClient

# Step 1: Connect to Cassandra
cluster = Cluster(['127.0.0.1'])  # Replace with your Cassandra IP if needed
session = cluster.connect('stock_data_keyspace')

# Query to fetch cleaned stock data from Cassandra
query_cleaned_stock = "SELECT * FROM cleaned_stock_data"
rows_cleaned_stock = session.execute(query_cleaned_stock)

# Query to fetch stock analysis data from Cassandra
query_stock_analysis = "SELECT * FROM stock_analysis"
rows_stock_analysis = session.execute(query_stock_analysis)

# Step 2: Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['stock_data_mongodb']  # Use your MongoDB database
cleaned_stock_collection = db['cleaned_stock_data']  # Collection for cleaned stock data
stock_analysis_collection = db['stock_analysis']     # Collection for stock analysis

# Step 3: Transfer cleaned stock data from Cassandra to MongoDB
cleaned_stock_data = []
for row in rows_cleaned_stock:
    cleaned_stock_data.append({
        'symbol': row.symbol,
        'timestamp': row.timestamp,
        'price': row.price,
        'volume': row.volume
    })

if cleaned_stock_data:
    cleaned_stock_collection.insert_many(cleaned_stock_data)
    print(f"Inserted {len(cleaned_stock_data)} records into MongoDB 'cleaned_stock_data' collection.")

# Step 4: Transfer stock analysis data from Cassandra to MongoDB
stock_analysis_data = []
for row in rows_stock_analysis:
    stock_analysis_data.append({
        'symbol': row.symbol,
        'timestamp': row.timestamp,
        'avg_price': row.avg_price,
        'total_volume': row.total_volume,
        'trade_count': row.trade_count
    })

if stock_analysis_data:
    stock_analysis_collection.insert_many(stock_analysis_data)
    print(f"Inserted {len(stock_analysis_data)} records into MongoDB 'stock_analysis' collection.")

# Close connections
session.shutdown()
client.close()

In [None]:
!pip install matplotlib
pip install seaborn

## update sympol column

In [None]:
cleaned_stock_data = pd.DataFrame(cleaned_stock_data)
stock_analysis_data = pd.DataFrame(stock_analysis_data)
import random

# Define the symbols list to assign
symbols = ['AAPL', 'AMZN', 'IC MARKETS:1', 'BINANCE:BTCUSDT', 'MSFT']

# Replace the 'symbol' column with random values from the list
cleaned_stock_data['symbol'] = [random.choice(symbols) for _ in range(len(cleaned_stock_data))]
stock_analysis_data['symbol'] = [random.choice(symbols) for _ in range(len(stock_analysis_data))]

# Display updated DataFrame
print(cleaned_stock_data.head())
print(stock_analysis_data.head())

## plot Price Trends Over Time

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(14, 7))
for symbol in cleaned_stock_data['symbol'].unique():
    subset = cleaned_stock_data[cleaned_stock_data['symbol'] == symbol]
    plt.plot(subset['timestamp'], subset['price'], label=symbol)
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Price Trends Over Time')
plt.legend()
plt.show()

## Distribution Analysis

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(cleaned_stock_data['price'], bins=30, kde=True)
plt.title('Distribution of Stock Prices')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.show()

## Box Plot: Compare the price distributions across different symbols to identify outliers and variations.

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(data=cleaned_stock_data, x='symbol', y='price')
plt.title('Price Distribution by Symbol')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.show()

## Correlation Analysis

In [None]:
correlation = cleaned_stock_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

## Bar Plot: Show average prices or total volume by symbol to compare performance

In [None]:
avg_price = cleaned_stock_data.groupby('symbol')['price'].mean().reset_index()
plt.figure(figsize=(12, 6))
sns.barplot(data=avg_price, x='symbol', y='price', palette='viridis')
plt.title('Average Price by Symbol')
plt.ylabel('Average Price')
plt.xticks(rotation=45)
plt.show()

## Scatter Plot: Plot price vs. volume to identify any relationship between them.

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=cleaned_stock_data, x='volume', y='price', hue='symbol')
plt.title('Price vs. Volume')
plt.xlabel('Volume')
plt.ylabel('Price')
plt.legend()
plt.show()
