In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

spark = SparkSession.builder.appName("BMW Sales Analysis").getOrCreate()
df = spark.read.option("header","true").option("inferSchema","true").csv("BMW sales data (2010-2024).csv")

df.show(5)




+--------+----+-------------+-----+---------+------------+-------------+----------+---------+------------+--------------------+
|   Model|Year|       Region|Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|
+--------+----+-------------+-----+---------+------------+-------------+----------+---------+------------+--------------------+
|5 Series|2016|         Asia|  Red|   Petrol|      Manual|          3.5|    151748|    98740|        8300|                High|
|      i8|2013|North America|  Red|   Hybrid|   Automatic|          1.6|    121671|    79219|        3428|                 Low|
|5 Series|2022|North America| Blue|   Petrol|   Automatic|          4.5|     10991|   113265|        6994|                 Low|
|      X3|2024|  Middle East| Blue|   Petrol|   Automatic|          1.7|     27255|    60971|        4047|                 Low|
|7 Series|2020|South America|Black|   Diesel|      Manual|          2.1|    122131|    49898|        308

In [2]:
df.filter(df.Model == "5 Series").show(10)

+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|   Model|Year|       Region| Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|
+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|5 Series|2016|         Asia|   Red|   Petrol|      Manual|          3.5|    151748|    98740|        8300|                High|
|5 Series|2022|North America|  Blue|   Petrol|   Automatic|          4.5|     10991|   113265|        6994|                 Low|
|5 Series|2017|  Middle East|Silver|   Diesel|      Manual|          1.9|    171362|    42926|        1232|                 Low|
|5 Series|2020|       Africa| White| Electric|      Manual|          2.3|    163444|   119486|        4668|                 Low|
|5 Series|2017|North America|Silver| Electric|      Manual|          3.1|     57843|   111326|   

In [7]:
df.groupBy("Model").agg(sum("Price_USD").alias("Toplam Satış")).orderBy(desc("Toplam Satış")).show()

+--------+------------+
|   Model|Toplam Satış|
+--------+------------+
|7 Series|   352610538|
|3 Series|   347226845|
|      i8|   347137044|
|5 Series|   345721780|
|      i3|   345427638|
|      X1|   343948341|
|      X3|   337349726|
|      X5|   335215320|
|      M5|   333498741|
|      X6|   333318141|
|      M3|   330275931|
+--------+------------+



In [13]:
df.withColumn("Vergilenmiş Fiyat", df.Price_USD * 1.3).show(5)

+--------+----+-------------+-----+---------+------------+-------------+----------+---------+------------+--------------------+-----------------+
|   Model|Year|       Region|Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|Vergilenmiş Fiyat|
+--------+----+-------------+-----+---------+------------+-------------+----------+---------+------------+--------------------+-----------------+
|5 Series|2016|         Asia|  Red|   Petrol|      Manual|          3.5|    151748|    98740|        8300|                High|         128362.0|
|      i8|2013|North America|  Red|   Hybrid|   Automatic|          1.6|    121671|    79219|        3428|                 Low|         102984.7|
|5 Series|2022|North America| Blue|   Petrol|   Automatic|          4.5|     10991|   113265|        6994|                 Low|         147244.5|
|      X3|2024|  Middle East| Blue|   Petrol|   Automatic|          1.7|     27255|    60971|        4047|                 L

In [14]:
df.createOrReplaceTempView("bmw_sales")

spark.sql("SELECT * FROM bmw_sales LIMIT 10").show()


+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|   Model|Year|       Region| Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|
+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|5 Series|2016|         Asia|   Red|   Petrol|      Manual|          3.5|    151748|    98740|        8300|                High|
|      i8|2013|North America|   Red|   Hybrid|   Automatic|          1.6|    121671|    79219|        3428|                 Low|
|5 Series|2022|North America|  Blue|   Petrol|   Automatic|          4.5|     10991|   113265|        6994|                 Low|
|      X3|2024|  Middle East|  Blue|   Petrol|   Automatic|          1.7|     27255|    60971|        4047|                 Low|
|7 Series|2020|South America| Black|   Diesel|      Manual|          2.1|    122131|    49898|   

In [15]:
spark.sql("SELECT * FROM bmw_sales WHERE Model = 'X5'").show()

+-----+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|Model|Year|       Region| Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|
+-----+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|   X5|2013|         Asia|  Blue| Electric|      Manual|          2.6|    110142|   114844|        5561|                 Low|
|   X5|2012|       Africa|  Blue| Electric|      Manual|          3.8|    142243|    82677|        7104|                High|
|   X5|2021|South America|   Red|   Diesel|      Manual|          2.2|    184981|    47527|        6273|                 Low|
|   X5|2020|  Middle East| White|   Hybrid|   Automatic|          4.9|    132299|    35401|        7110|                High|
|   X5|2021|       Africa|   Red|   Diesel|      Manual|          3.4|     99697|    60278|        2913|              

In [19]:
spark.sql("SELECT * FROM bmw_sales WHERE Engine_Size_L > 3.0").show(20)

+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|   Model|Year|       Region| Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|
+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|5 Series|2016|         Asia|   Red|   Petrol|      Manual|          3.5|    151748|    98740|        8300|                High|
|5 Series|2022|North America|  Blue|   Petrol|   Automatic|          4.5|     10991|   113265|        6994|                 Low|
|7 Series|2020|North America|Silver|   Diesel|   Automatic|          3.8|     27403|   100015|        8111|                High|
|5 Series|2017|North America|Silver| Electric|      Manual|          3.1|     57843|   111326|        4880|                 Low|
|      X5|2012|       Africa|  Blue| Electric|      Manual|          3.8|    142243|    82677|   

In [26]:
spark.sql("SELECT Model , SUM(Price_USD) as toplam_satis  FROM bmw_sales GROUP BY Model ").show(10)

+--------+------------+
|   Model|toplam_satis|
+--------+------------+
|      i3|   345427638|
|3 Series|   347226845|
|      X6|   333318141|
|      X1|   343948341|
|7 Series|   352610538|
|      X3|   337349726|
|5 Series|   345721780|
|      M5|   333498741|
|      M3|   330275931|
|      i8|   347137044|
+--------+------------+
only showing top 10 rows


In [28]:
spark.sql("SELECT * FROM bmw_sales WHERE Transmission = 'Manual' and Engine_Size_L > 3.0").show(10)

+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|   Model|Year|       Region| Color|Fuel_Type|Transmission|Engine_Size_L|Mileage_KM|Price_USD|Sales_Volume|Sales_Classification|
+--------+----+-------------+------+---------+------------+-------------+----------+---------+------------+--------------------+
|5 Series|2016|         Asia|   Red|   Petrol|      Manual|          3.5|    151748|    98740|        8300|                High|
|5 Series|2017|North America|Silver| Electric|      Manual|          3.1|     57843|   111326|        4880|                 Low|
|      X5|2012|       Africa|  Blue| Electric|      Manual|          3.8|    142243|    82677|        7104|                High|
|      X1|2014|       Africa| White|   Petrol|      Manual|          4.8|     13568|    62941|        1113|                 Low|
|      i8|2021|South America|Silver|   Diesel|      Manual|          4.8|      3188|    64577|   