In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_date
import os

# Step 1: Initialize Spark Session
spark = SparkSession.builder \
    .appName("AMD Stock Analysis") \
    .getOrCreate()
spark._jsc.hadoopConfiguration().set("hadoop.native.lib", "false")


# Step 2: Load Data
file_path = '/content/World-Stock-Prices-Dataset.csv'  # Replace with actual path
df = spark.read.option("header", True).option("inferSchema", True).csv(file_path)

# Step 3: Filter AMD Data
amd_df = df.filter(col("Ticker") == "AMD")

# Step 4: Drop unnecessary columns
amd_df = amd_df.drop("Capital Gains")

# Step 5: Convert Date and sort
amd_df = amd_df.withColumn("Date", to_date("Date")).orderBy("Date")

# Step 6: Save to CSV (for Tableau)
output_path = output_path = "/content/AMD.csv"
amd_df.coalesce(1).write.option("header", True).mode("overwrite").csv(output_path)

# Optional: Show sample
amd_df.show(5)

+----------+--------+--------+--------+------+--------+---------+------------+----------+------+------------+-------+
|      Date|    Open|    High|     Low| Close|  Volume|Dividends|Stock Splits|Brand_Name|Ticker|Industry_Tag|Country|
+----------+--------+--------+--------+------+--------+---------+------------+----------+------+------------+-------+
|2000-01-03|14.96875|15.59375| 14.6875|  15.5| 7843200|      0.0|         0.0|       amd|   AMD|  technology|    usa|
|2000-01-04|  15.125|    15.5|14.59375|14.625| 6290200|      0.0|         0.0|       amd|   AMD|  technology|    usa|
|2000-01-05|14.53125| 15.0625|    14.0|  15.0| 8204600|      0.0|         0.0|       amd|   AMD|  technology|    usa|
|2000-01-06|    15.5|    16.0|   15.25|  16.0|11489400|      0.0|         0.0|       amd|   AMD|  technology|    usa|
|2000-01-07|15.40625|16.40625|  15.375| 16.25| 8543400|      0.0|         0.0|       amd|   AMD|  technology|    usa|
+----------+--------+--------+--------+------+--------+-