# READ CSV FILE

## List files

In [0]:
dbutils.fs.ls("dbfs:/FileStore/tables/fantasy/")

Out[1]: [FileInfo(path='dbfs:/FileStore/tables/fantasy/characters.csv', name='characters.csv', size=1123, modificationTime=1723331905000),
 FileInfo(path='dbfs:/FileStore/tables/fantasy/inventory.csv', name='inventory.csv', size=1122, modificationTime=1723331932000),
 FileInfo(path='dbfs:/FileStore/tables/fantasy/items.csv', name='items.csv', size=1307, modificationTime=1723331936000)]

## Read CSV file

In [0]:
df_characters = spark.read.format("csv").option("inferschema", True).option("header",True).option("sep",",").load('dbfs:/FileStore/tables/fantasy/characters.csv')

In [0]:
df_characters.show()

+---+---------+----------+-------+-----+----------+------+--------+-------------------+---------+
| id|     name|     guild|  class|level|experience|health|is_alive|        last_active|mentor_id|
+---+---------+----------+-------+-----+----------+------+--------+-------------------+---------+
|  1|  Aragorn|   Rangers|Warrior|   25|      9000| 120.5|    true|2023-09-20 08:00:00|     null|
|  2|  Legolas|  Mirkwood| Archer|   22|      7500|100.75|    true|2023-09-21 09:00:00|     null|
|  3|    Gimli|    Erebor|Warrior|   20|      6000|130.25|    true|2023-09-22 07:00:00|     null|
|  4|    Frodo| Shirefolk| Hobbit|   12|      2500|  50.5|   false|2023-09-23 10:00:00|        1|
|  5|      Sam| Shirefolk| Hobbit|   13|      2750|  52.5|    true|2023-09-23 10:05:00|        1|
|  6|  Gandalf|    Istari|   Mage|   30|     10000|  90.0|    true|2023-09-20 11:00:00|     null|
|  7|  Boromir|    Gondor|Warrior|   18|      5500| 115.5|   false|2023-09-21 12:00:00|     null|
|  8|    Merry| Shir

In [0]:
df_characters.printSchema()

root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- guild: string (nullable = true)
 |-- class: string (nullable = true)
 |-- level: integer (nullable = true)
 |-- experience: integer (nullable = true)
 |-- health: double (nullable = true)
 |-- is_alive: boolean (nullable = true)
 |-- last_active: timestamp (nullable = true)
 |-- mentor_id: integer (nullable = true)



In [0]:
df_inventory = spark.read.format("csv").options(inferschema="True",header="True",sep=",").load('dbfs:/FileStore/tables/fantasy/inventory.csv')

In [0]:
df_inventory.printSchema()

root
 |-- id: integer (nullable = true)
 |-- character_id: integer (nullable = true)
 |-- item_id: integer (nullable = true)
 |-- quantity: integer (nullable = true)
 |-- is_equipped: boolean (nullable = true)
 |-- purchase_date: timestamp (nullable = true)
 |-- expiry_date: timestamp (nullable = true)
 |-- value: integer (nullable = true)



## Read CSV file with schema defined

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, TimestampType, BooleanType

In [0]:
schema_items = StructType([
    StructField("id",IntegerType(),True),
    StructField("name",StringType(),True),
    StructField("item_type",StringType(),True),
    StructField("power",IntegerType(),True),
    StructField("weight",DoubleType(),True),
    StructField("rarity",StringType(),True),
    StructField("date_added",TimestampType(),True),
])

In [0]:
df_items = spark.read.format("csv").schema(schema_items).option("header",True).option("sep",",").load('dbfs:/FileStore/tables/fantasy/items.csv')

In [0]:
df_items.printSchema()

root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- item_type: string (nullable = true)
 |-- power: integer (nullable = true)
 |-- weight: double (nullable = true)
 |-- rarity: string (nullable = true)
 |-- date_added: timestamp (nullable = true)

