In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType

In [0]:
circuits_schema = StructType(fields=[StructField("circuitId", IntegerType(), False),
                                     StructField("circuitRef", StringType(), True),
                                     StructField("name", StringType(), True),
                                     StructField("location", StringType(), True),
                                     StructField("country", StringType(), True),
                                     StructField("lat", DoubleType(), True),
                                     StructField("lng", DoubleType(), True),
                                     StructField("alt", IntegerType(), True),
                                     StructField("url", StringType(), True),
])

In [0]:
circuits_df = spark.read \
    .option("header", True) \
    .schema(circuits_schema) \
    .csv('dbfs:/mnt/formula19533dl/raw/circuits.csv')

### Select only the required columns

#### Method - 1 - `df.select("colm1", "coln2", ...)`

In [0]:
circuits_selected_df = circuits_df.select("circuitId", "circuitRef", "name", "location", "country", "lat", "lng", "alt",)

#### Method - 2 - `df.select(df.coln1, df.coln2, ...)`

In [0]:
circuits_selected_df = circuits_df.select(circuits_df.circuitId, circuits_df.circuitRef, circuits_df.name, circuits_df.location, circuits_df.country, circuits_df.lat, circuits_df.lng, circuits_df.alt)

#### Method - 3 - df.select(df['coln1'], df['coln2'], ...)

In [0]:
circuits_selected_df = circuits_df.select(circuits_df['circuitId'], circuits_df['circuitRef'], circuits_df['name'], circuits_df['location'], circuits_df['country'], circuits_df['lat'], circuits_df['lng'], circuits_df['alt'])

#### Method - 4 - Import `pyspark.sql.functions` and use `col()` function

In [0]:
from pyspark.sql.functions import col

circuits_selected_df = circuits_df.select(col("circuitId"), col("circuitRef"), col("name"), col("location"), col("country"), col("lat"), col("lng"), col("alt"))

In [0]:
display(circuits_selected_df)

In [0]:
# using col we can also rename columns

circuits_selected_df = circuits_df.select(col("circuitId").alias("circuit_id"), col("circuitRef").alias("circuit_ref"), 
                                          col("name"), col("location"), col("country"), 
                                          col("lat").alias('latitude'), col("lng").alias('longitude'), col("alt"))

In [0]:
display(circuits_selected_df)