# Working with Columns

## Import Libraries

In [None]:
import os

import findspark
import pyspark

# Display setting
from IPython.core.display import HTML
from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, to_timestamp

findspark.init()

## SparkSession

In [None]:
spark = SparkSession.builder.master("local").config(conf=SparkConf()).getOrCreate()

In [None]:
display(HTML("<style>pre {white-space: pre !important; }</style>"))

## Load data

In [None]:
data_path = "file:///" + os.getcwd() + "/data"

file_path = data_path + "/reported-crimes.csv"

crimes_df = (
    spark.read.csv(file_path, header=True)
    .withColumn("Date", to_timestamp(col("Date"), "MM/dd/yyyy hh:mm:ss a"))
    .filter(col("Date") <= lit("2018-11-11"))
)
crimes_df.show(5)

**Display only the first 5 rows of the column name IUCR**

In [None]:
crimes_df.select("IUCR").show(5)

In [None]:
crimes_df.select(crimes_df.IUCR).show(5)

In [None]:
crimes_df.select(col("IUCR")).show(5)

**Display only the first 4 rows of the columns Case Number, Date, and Arrest**

In [None]:
crimes_df.select("Case Number", "Date", "Arrest").show(4)

**Add a column with name One, with entries all 1s**

In [None]:
crimes_df.withColumn("One", lit(1)).show(5)

**Remove the column IUCR**

In [None]:
crimes_df = crimes_df.drop("IUCR")
crimes_df.show(5)