In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType

# Initialize a Spark session with increased timeout settings
spark = SparkSession.builder \
    .appName("Meta Data Table") \
    .getOrCreate() # Fixed: Removed extra indent

file_path = '/content/Metro_Interstate_Traffic_Volume.csv'
df = spark.read.csv(file_path, header=True, inferSchema=True)

# Define the meta-data
meta_data = [
    ("holiday", "string", "0", "Indicates if the day is a holiday.", "12"),
    ("temp", "double", "0", "Temperature in Kelvin.", "5843"),
    ("rain_1h", "double", "0", "Amount of rainfall in the last hour.", "372"),
    ("snow_1h", "double", "0", "Amount of snow in the last hour.", "12"),
    ("clouds_all", "int", "0", "Percentage of cloud coverage.", "60"),
    ("weather_main", "string", "0", "Main weather condition (e.g., clear, rain, etc.).", "11"),
    ("weather_description", "string", "0", "Detailed description of the weather condition.", "38"),
    ("date_time", "string", "0", "Timestamp for the data entry.", "40575"),
    ("traffic_volume", "int", "0", "Traffic volume recorded at the time.", "6704")
]

# Define the schema
schema = StructType([
    StructField("Column Name", StringType(), True),
    StructField("Data Type", StringType(), True),
    StructField("Missing Values", StringType(), True),
    StructField("Purpose", StringType(), True),
    StructField("Unique Values/Range", StringType(), True)
])

# Create the DataFrame
meta_data_df = spark.createDataFrame(meta_data, schema=schema)

# Display the meta-data DataFrame
meta_data_df.show(truncate=False)

# Stop the Spark session
spark.stop()

+-------------------+---------+--------------+-------------------------------------------------+-------------------+
|Column Name        |Data Type|Missing Values|Purpose                                          |Unique Values/Range|
+-------------------+---------+--------------+-------------------------------------------------+-------------------+
|holiday            |string   |0             |Indicates if the day is a holiday.               |12                 |
|temp               |double   |0             |Temperature in Kelvin.                           |5843               |
|rain_1h            |double   |0             |Amount of rainfall in the last hour.             |372                |
|snow_1h            |double   |0             |Amount of snow in the last hour.                 |12                 |
|clouds_all         |int      |0             |Percentage of cloud coverage.                    |60                 |
|weather_main       |string   |0             |Main weather condi