## Project 1

##### Data: Tesla, Inc. Common Stock (TSLA) Historical Quotes (11/03/2015 - 10/31/2025)
##### Source: https://www.nasdaq.com/market-activity/stocks/tsla/historical?page=1&rows_per_page=10&timeline=y5
##### By: Zhengyang Xie (zx2506)

### 1. Import Data

In [1]:
import pandas as pd

# Set the file path for the CSV file
file_path = "./Tesla Stock Data.csv"

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Display the first 5 rows of the DataFrame
df.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,10/31/2025,$456.56,83135790,$446.75,$458.00,$443.6855
1,10/30/2025,$440.10,72447940,$451.05,$455.0607,$439.61
2,10/29/2025,$461.51,67983540,$462.50,$465.70,$452.65
3,10/28/2025,$460.55,80185670,$454.775,$467.00,$451.60
4,10/27/2025,$452.42,105867500,$439.98,$460.16,$438.69


### 2. Average, Median, and Mode of Tesla’s Historical Opening Prices

#### 2.1 Compute with Pandas

In [2]:
# Remove '$' and ',' from the 'Open' column and convert it to float
df['Open'] = df['Open'].replace(r'[\$,]', '', regex=True).astype(float)

# Calculate mean, median, and mode of the 'Open' column
mean_Open = df['Open'].mean()
median_Open = df['Open'].median()
mode_Open = df['Open'].mode()

# Print the results
print(f"Mean_Open = {mean_Open:.2f}")
print(f"Median_Open = {median_Open:.2f}")
print(f"Mode_Open = {mode_Open.tolist()}")

Mean_Open = 142.75
Median_Open = 140.45
Mode_Open = [24.0]


#### 2.2 Using Python Standard Library

In [3]:
import csv

# Open the CSV file
with open("Tesla Stock Data.csv", newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)  # Read each row as a dictionary
    open_values = []            # Create an empty list to store 'Open' prices

    # Loop through each row in the CSV file
    for row in reader:
        # Remove '$' and ',' from the value and strip extra spaces
        value = row['Open'].replace('$', '').replace(',', '').strip()
        
        # If the value is not empty, convert it to float and add to the list
        if value:
            open_values.append(float(value))

In [4]:
# Calculate mean
# Calculate the mean by dividing the total sum by the number of values
mean_Open = sum(open_values) / len(open_values)

In [5]:
# Calculate median
# Sort the list of values from smallest to largest
sorted_values = sorted(open_values)

# Get the total number of values
n = len(sorted_values)

# If there is an odd number of values, take the middle one
if n % 2 == 1:
    median_Open = sorted_values[n // 2]
# If there is an even number of values, take the average of the two middle values
else:
    median_Open = (sorted_values[n // 2 - 1] + sorted_values[n // 2]) / 2

In [6]:
# Calculate mode
# Create an empty dictionary to count how many times each value appears
counts = {}

# Loop through each value in the list
for v in open_values:
    # Increase the count for this value by 1 (start from 0 if not seen before)
    counts[v] = counts.get(v, 0) + 1

# Find the highest frequency (the largest count)
max_count = max(counts.values())

# Get all values that appear the most times (the mode)
mode_Open = []
for k, v in counts.items():
    if v == max_count:
        mode_Open.append(k)

In [7]:
# Print the results
print(f"Mean_Open = {mean_Open:.2f}")
print(f"Median_Open = {median_Open:.2f}")
print(f"Mode_Open = {mode_Open}")

Mean_Open = 142.75
Median_Open = 140.45
Mode_Open = [24.0]


### 3. Data Visualization of Tesla’s Open Prices Over the Past Month

In [8]:
# 1. Read the dataset
df = pd.read_csv("Tesla Stock Data.csv")

# 2. Clean and convert date and price columns
# Convert 'Date' to datetime objects (automatically detects format)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Remove '$' and ',' then convert to float
df['Open'] = df['Open'].replace(r'[\$,]', '', regex=True).astype(float)

# Drop rows with invalid dates or prices
df = df.dropna(subset=['Date', 'Open'])

# 3. Keep the first 30 rows for better terminal display
df = df.head(23)

# 4. Extract data for visualization
dates = df['Date'].dt.strftime('%Y-%m-%d').tolist()   # Format dates as YYYY-MM-DD
opens = df['Open'].tolist()

# 5. Compute scaling factor so the longest bar fits in 50 characters
max_value = max(opens)
scale = 50 / max_value

# 6. Print title and axis labels
print("=" * 80)
print("TESLA STOCK OPEN PRICES IN OCTOBER 2025".center(80))
print("=" * 80)
print("Y-axis: Open Price ($)")
print("X-axis: Date (YYYY-MM-DD)\n")

# 7. Draw ASCII bar chart
for date, val in zip(dates, opens):
    bar = "█" * int(val * scale)
    print(f"{date} | {bar} {val:.2f}")

                    TESLA STOCK OPEN PRICES IN OCTOBER 2025                     
Y-axis: Open Price ($)
X-axis: Date (YYYY-MM-DD)

2025-10-31 | ███████████████████████████████████████████████ 446.75
2025-10-30 | ███████████████████████████████████████████████ 451.05
2025-10-29 | █████████████████████████████████████████████████ 462.50
2025-10-28 | ████████████████████████████████████████████████ 454.77
2025-10-27 | ██████████████████████████████████████████████ 439.98
2025-10-24 | ███████████████████████████████████████████████ 446.83
2025-10-23 | ████████████████████████████████████████████ 420.00
2025-10-22 | ███████████████████████████████████████████████ 443.45
2025-10-21 | ███████████████████████████████████████████████ 445.75
2025-10-20 | ███████████████████████████████████████████████ 443.87
2025-10-17 | █████████████████████████████████████████████ 425.50
2025-10-16 | ██████████████████████████████████████████████ 434.73
2025-10-15 | ████████████████████████████████████████████