# DAY 07 - Basic DataFrame Operations
Youtube Link: https://www.youtube.com/watch?v=02lSlhwLU4c

### Read Some Data into a DataFrame

In [None]:
# First, we need a DataFrame to look at!
df = spark.read.csv('Files/property-sales.csv', header = True, inferSchema = True)

### Ways of Viewing Data
- print
- display
- head

In [None]:
# Show a Printed Representation of the DataFrame
df.show()

In [None]:
# Interactive View of the DataFrame
display(df)

In [None]:
# Show the First 'n' rows of the DataFrame
display(df.head(n))

### Exploring Schema

In [None]:
df.printSchema()

### Get Data Types (not full Schema)

In [None]:
df.dtypes

### Actual Schema

In [None]:
# The actual schema can be accessed using df.schema
df.schema

# Can be used for re-using the schema of another dataframe instead of explicitly rewriting everything again (especially if there are multiple columns)
source_schema = df.schema
new_df_with_existing_schema = spark.read.csv(schema = source_schema)

### Column Operations

In [None]:
# To view all columns present
df.columns

In [None]:
# Selecting a single column
df.select('ColumnName').show()

# When passed thru type, instead of returning as a column type, the selected column will be returned as a dataframe
type(df.select('ColumnName'))

In [None]:
# Renaming existing columns
df = df.withColumnRenamed('OldColumnName', 'NewColumnName')
df.select('NewColumnName').show()

In [None]:
# Selecting a few columns
df.select(['Column1','Column2']).show

### Adding New Column

In [None]:
# Adding new column to existing dataframe
df = df.withColumn('NewColumn', df['ExistingColumn'] * 2)
df.show()

### Removing Column

In [None]:
df = df.drop('NewColumn')
df.show()