<a href="https://colab.research.google.com/github/Tharunbatheni/repositary-1/blob/main/PysparkDataFrame_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

# ============================================================================
# SECTION 1: INSTALLATION AND SETUP FOR GOOGLE COLAB
# ============================================================================

# Install PySpark and required dependencies
!pip install pyspark findspark

# Import necessary libraries
import findspark
findspark.init()




In [3]:

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, avg, count, sum, max, min
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType
import pandas as pd


In [4]:

# Initialize Spark Session
spark = SparkSession.builder \
    .appName("PySpark Basics Tutorial") \
    .master("local[*]") \
    .config("spark.sql.adaptive.enabled", "true") \
    .config("spark.sql.adaptive.coalescePartitions.enabled", "true") \
    .getOrCreate()

print("Spark Session Created Successfully!")
print(f"Spark Version: {spark.version}")


Spark Session Created Successfully!
Spark Version: 3.5.1


In [5]:


# ============================================================================
# SECTION 2: CREATING DATAFRAMES
# ============================================================================

# Method 1: Create DataFrame from Python list
print("\n=== Creating DataFrame from List ===")
data = [
    ("Alice", 25, "Engineer", 75000),
    ("Bob", 30, "Manager", 85000),
    ("Charlie", 35, "Analyst", 65000),
    ("Diana", 28, "Designer", 70000),
    ("Eve", 32, "Engineer", 80000)
]

columns = ["Name", "Age", "Job", "Salary"]
df = spark.createDataFrame(data, columns)
df



=== Creating DataFrame from List ===


DataFrame[Name: string, Age: bigint, Job: string, Salary: bigint]

In [6]:
df.show()  # action

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [7]:

# Method 2: Create DataFrame with explicit schema
print("\n=== Creating DataFrame with Schema ===")
schema = StructType([
    StructField("Name", StringType(), True),
    StructField("Age", IntegerType(), True),
    StructField("Job", StringType(), True),
    StructField("Salary", FloatType(), True)
])
columns = ["Name", "Age", "Job", "Salary"]

df_schema = spark.createDataFrame(data, schema=columns)
df_schema.printSchema()



=== Creating DataFrame with Schema ===
root
 |-- Name: string (nullable = true)
 |-- Age: long (nullable = true)
 |-- Job: string (nullable = true)
 |-- Salary: long (nullable = true)



In [8]:
df_schema.show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [9]:

# Method 3: Create DataFrame from Pandas DataFrame
print("\n=== Creating DataFrame from Pandas ===")
pandas_df = pd.DataFrame({
    'Product': ['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'Headphones'],
    'Price': [999.99, 25.50, 75.00, 299.99, 150.00],
    'Category': ['Electronics', 'Accessories', 'Accessories', 'Electronics', 'Audio'],
    'Stock': [50, 200, 150, 30, 75]
})
pandas_df


=== Creating DataFrame from Pandas ===


Unnamed: 0,Product,Price,Category,Stock
0,Laptop,999.99,Electronics,50
1,Mouse,25.5,Accessories,200
2,Keyboard,75.0,Accessories,150
3,Monitor,299.99,Electronics,30
4,Headphones,150.0,Audio,75


In [10]:

products_df = spark.createDataFrame(pandas_df)
products_df


DataFrame[Product: string, Price: double, Category: string, Stock: bigint]

In [11]:
products_df.show()

+----------+------+-----------+-----+
|   Product| Price|   Category|Stock|
+----------+------+-----------+-----+
|    Laptop|999.99|Electronics|   50|
|     Mouse|  25.5|Accessories|  200|
|  Keyboard|  75.0|Accessories|  150|
|   Monitor|299.99|Electronics|   30|
|Headphones| 150.0|      Audio|   75|
+----------+------+-----------+-----+



In [12]:

# ============================================================================
# SECTION 3: BASIC DATAFRAME OPERATIONS
# ============================================================================

print("\n=== Basic DataFrame Info ===")
print(f"Number of rows: {df.count()}")
print(f"Number of columns: {len(df.columns)}")
print(f"Column names: {df.columns}")



=== Basic DataFrame Info ===
Number of rows: 5
Number of columns: 4
Column names: ['Name', 'Age', 'Job', 'Salary']


In [48]:
df.select('*').show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [13]:
# df.Name
# df["Name"]

In [14]:
df.show()


+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [51]:
df.select(df.Name,(df.Salary * 1.5).alias("incresed_salary")).show()

+-------+---------------+
|   Name|incresed_salary|
+-------+---------------+
|  Alice|       112500.0|
|    Bob|       127500.0|
|Charlie|        97500.0|
|  Diana|       105000.0|
|    Eve|       120000.0|
+-------+---------------+



In [15]:

# Select specific columns
print("\n=== Selecting Columns ===")
df.select("Name", "Job").show()



=== Selecting Columns ===
+-------+--------+
|   Name|     Job|
+-------+--------+
|  Alice|Engineer|
|    Bob| Manager|
|Charlie| Analyst|
|  Diana|Designer|
|    Eve|Engineer|
+-------+--------+



In [60]:
people=df.show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [62]:
df.na.fill(0).show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [16]:

# Filter operations
print("\n=== Filtering Data ===")
df.filter(col("Age") > 30).show()



=== Filtering Data ===
+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|Charlie| 35| Analyst| 65000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [17]:
df.filter(df.Age > 30).show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|Charlie| 35| Analyst| 65000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [18]:

df.filter(df.Salary > 70000).show()


+-----+---+--------+------+
| Name|Age|     Job|Salary|
+-----+---+--------+------+
|Alice| 25|Engineer| 75000|
|  Bob| 30| Manager| 85000|
|  Eve| 32|Engineer| 80000|
+-----+---+--------+------+



In [52]:
df.filter((df.Age > 30) | (df.Salary > 70000)).show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [53]:
df.filter((df.Name.isin("Alice","Bob"))).show()

+-----+---+--------+------+
| Name|Age|     Job|Salary|
+-----+---+--------+------+
|Alice| 25|Engineer| 75000|
|  Bob| 30| Manager| 85000|
+-----+---+--------+------+



In [54]:
df.filter((df.Name.like("A%"))).show()

+-----+---+--------+------+
| Name|Age|     Job|Salary|
+-----+---+--------+------+
|Alice| 25|Engineer| 75000|
+-----+---+--------+------+



In [55]:
df.filter((df.Name.like("B%"))).show()

+----+---+-------+------+
|Name|Age|    Job|Salary|
+----+---+-------+------+
| Bob| 30|Manager| 85000|
+----+---+-------+------+



In [57]:
df.filter((df.Age.between(25,30))).show()

+-----+---+--------+------+
| Name|Age|     Job|Salary|
+-----+---+--------+------+
|Alice| 25|Engineer| 75000|
|  Bob| 30| Manager| 85000|
|Diana| 28|Designer| 70000|
+-----+---+--------+------+



In [19]:

# Multiple conditions
print("\n=== Multiple Conditions ===")
df.filter((col("Age") > 25) & (col("Salary") > 70000)).show()



=== Multiple Conditions ===
+----+---+--------+------+
|Name|Age|     Job|Salary|
+----+---+--------+------+
| Bob| 30| Manager| 85000|
| Eve| 32|Engineer| 80000|
+----+---+--------+------+



In [20]:

# Add new columns
print("\n=== Adding New Columns ===")
df_with_bonus = df.withColumn("Bonus", col("Salary") * 0.1)
df_with_bonus.show()




=== Adding New Columns ===
+-------+---+--------+------+------+
|   Name|Age|     Job|Salary| Bonus|
+-------+---+--------+------+------+
|  Alice| 25|Engineer| 75000|7500.0|
|    Bob| 30| Manager| 85000|8500.0|
|Charlie| 35| Analyst| 65000|6500.0|
|  Diana| 28|Designer| 70000|7000.0|
|    Eve| 32|Engineer| 80000|8000.0|
+-------+---+--------+------+------+



In [21]:


# Conditional columns
print("\n=== Conditional Columns ===")
df_with_category = df.withColumn(
    "Salary_Category",
    when(col("Salary") > 80000, "High")
    .when(col("Salary") > 70000, "Medium")
    .otherwise("Low")
)
df_with_category.show()




=== Conditional Columns ===
+-------+---+--------+------+---------------+
|   Name|Age|     Job|Salary|Salary_Category|
+-------+---+--------+------+---------------+
|  Alice| 25|Engineer| 75000|         Medium|
|    Bob| 30| Manager| 85000|           High|
|Charlie| 35| Analyst| 65000|            Low|
|  Diana| 28|Designer| 70000|            Low|
|    Eve| 32|Engineer| 80000|         Medium|
+-------+---+--------+------+---------------+



In [64]:
df.withColumn("Bonus", (df.Salary * 0.1)).show()

+-------+---+--------+------+------+
|   Name|Age|     Job|Salary| Bonus|
+-------+---+--------+------+------+
|  Alice| 25|Engineer| 75000|7500.0|
|    Bob| 30| Manager| 85000|8500.0|
|Charlie| 35| Analyst| 65000|6500.0|
|  Diana| 28|Designer| 70000|7000.0|
|    Eve| 32|Engineer| 80000|8000.0|
+-------+---+--------+------+------+



In [22]:


# ============================================================================
# SECTION 4: AGGREGATIONS AND GROUPING
# ============================================================================

print("\n=== Basic Aggregations ===")
df.agg(
    avg("Age").alias("Average_Age"),
    avg("Salary").alias("Average_Salary"),
    count("*").alias("Total_Records")
).show()



=== Basic Aggregations ===
+-----------+--------------+-------------+
|Average_Age|Average_Salary|Total_Records|
+-----------+--------------+-------------+
|       30.0|       75000.0|            5|
+-----------+--------------+-------------+



In [23]:
df.agg(
    avg("Age"),
    avg("Salary").alias("Average_Salary"),
    count("*").alias("Total_Records")
).show()

+--------+--------------+-------------+
|avg(Age)|Average_Salary|Total_Records|
+--------+--------------+-------------+
|    30.0|       75000.0|            5|
+--------+--------------+-------------+



In [24]:

print("\n=== Group By Operations ===")
df.groupBy("Job").agg(
    count("*").alias("Count"),
    avg("Salary").alias("Avg_Salary"),
    max("Age").alias("Max_Age")
).show()



=== Group By Operations ===
+--------+-----+----------+-------+
|     Job|Count|Avg_Salary|Max_Age|
+--------+-----+----------+-------+
|Engineer|    2|   77500.0|     32|
| Manager|    1|   85000.0|     30|
|Designer|    1|   70000.0|     28|
| Analyst|    1|   65000.0|     35|
+--------+-----+----------+-------+



In [68]:
df.orderBy(df.Salary.desc()).show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|    Bob| 30| Manager| 85000|
|    Eve| 32|Engineer| 80000|
|  Alice| 25|Engineer| 75000|
|  Diana| 28|Designer| 70000|
|Charlie| 35| Analyst| 65000|
+-------+---+--------+------+



In [25]:

# ============================================================================
# SECTION 5: DATA MANIPULATION
# ============================================================================

print("\n=== Sorting Data ===")
df.orderBy(col("Salary").desc()).show()



=== Sorting Data ===
+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|    Bob| 30| Manager| 85000|
|    Eve| 32|Engineer| 80000|
|  Alice| 25|Engineer| 75000|
|  Diana| 28|Designer| 70000|
|Charlie| 35| Analyst| 65000|
+-------+---+--------+------+



In [26]:

# Rename columns
print("\n=== Renaming Columns ===")
df_renamed = df.withColumnRenamed("Job", "Position")
df_renamed.show()



=== Renaming Columns ===
+-------+---+--------+------+
|   Name|Age|Position|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [27]:

# Drop columns
print("\n=== Dropping Columns ===")
df_dropped = df.drop("Age")
df_dropped.show()



=== Dropping Columns ===
+-------+--------+------+
|   Name|     Job|Salary|
+-------+--------+------+
|  Alice|Engineer| 75000|
|    Bob| Manager| 85000|
|Charlie| Analyst| 65000|
|  Diana|Designer| 70000|
|    Eve|Engineer| 80000|
+-------+--------+------+



In [28]:

# Handle missing values
print("\n=== Handling Missing Values ===")
# Create data with nulls for demonstration
data_with_nulls = [
    ("John", 25, "Engineer", 75000),
    ("Jane", None, "Manager", 85000),
    ("Mike", 35, None, 65000),
    ("Sarah", 28, "Designer", None)
]

df_nulls = spark.createDataFrame(data_with_nulls, columns)
print("Data with nulls:")
df_nulls.show()



=== Handling Missing Values ===
Data with nulls:
+-----+----+--------+------+
| Name| Age|     Job|Salary|
+-----+----+--------+------+
| John|  25|Engineer| 75000|
| Jane|NULL| Manager| 85000|
| Mike|  35|    NULL| 65000|
|Sarah|  28|Designer|  NULL|
+-----+----+--------+------+



In [29]:

# Fill nulls
df_filled = df_nulls.fillna({"Age": 0, "Job": "Unknown", "Salary": 0})
print("After filling nulls:")
df_filled.show()


After filling nulls:
+-----+---+--------+------+
| Name|Age|     Job|Salary|
+-----+---+--------+------+
| John| 25|Engineer| 75000|
| Jane|  0| Manager| 85000|
| Mike| 35| Unknown| 65000|
|Sarah| 28|Designer|     0|
+-----+---+--------+------+



In [30]:

# Drop rows with nulls
df_no_nulls = df_nulls.dropna()
print("After dropping nulls:")
df_no_nulls.show()


After dropping nulls:
+----+---+--------+------+
|Name|Age|     Job|Salary|
+----+---+--------+------+
|John| 25|Engineer| 75000|
+----+---+--------+------+



In [31]:
df.show()

+-------+---+--------+------+
|   Name|Age|     Job|Salary|
+-------+---+--------+------+
|  Alice| 25|Engineer| 75000|
|    Bob| 30| Manager| 85000|
|Charlie| 35| Analyst| 65000|
|  Diana| 28|Designer| 70000|
|    Eve| 32|Engineer| 80000|
+-------+---+--------+------+



In [32]:

# ============================================================================
# SECTION 6: JOINS
# ============================================================================

print("\n=== Join Operations ===")
# Create department data
dept_data = [
    ("Engineer", "Technology"),
    ("Manager", "Administration"),
    ("Analyst", "Business"),
    ("Designer", "Creative")
]

dept_df = spark.createDataFrame(dept_data, ["Job", "Department"])
dept_df.show()



=== Join Operations ===
+--------+--------------+
|     Job|    Department|
+--------+--------------+
|Engineer|    Technology|
| Manager|Administration|
| Analyst|      Business|
|Designer|      Creative|
+--------+--------------+



In [33]:

# Inner join
print("Inner Join:")
joined_df = df.join(dept_df, "Job", "inner")
joined_df.show()

# Left join
print("Left Join:")
left_joined = df.join(dept_df, "Job", "left")
left_joined.show()


Inner Join:
+--------+-------+---+------+--------------+
|     Job|   Name|Age|Salary|    Department|
+--------+-------+---+------+--------------+
| Analyst|Charlie| 35| 65000|      Business|
|Designer|  Diana| 28| 70000|      Creative|
|Engineer|  Alice| 25| 75000|    Technology|
|Engineer|    Eve| 32| 80000|    Technology|
| Manager|    Bob| 30| 85000|Administration|
+--------+-------+---+------+--------------+

Left Join:
+--------+-------+---+------+--------------+
|     Job|   Name|Age|Salary|    Department|
+--------+-------+---+------+--------------+
|Engineer|  Alice| 25| 75000|    Technology|
| Manager|    Bob| 30| 85000|Administration|
|Designer|  Diana| 28| 70000|      Creative|
| Analyst|Charlie| 35| 65000|      Business|
|Engineer|    Eve| 32| 80000|    Technology|
+--------+-------+---+------+--------------+



In [34]:


# ============================================================================
# SECTION 8: SQL QUERIES
# ============================================================================

# print("\n=== SQL Queries ===")
# # Register DataFrame as temporary view
# df.createOrReplaceTempView("employees")

# # Run SQL query
# result = spark.sql("""
#     SELECT Job, AVG(Salary) as avg_salary, COUNT(*) as count
#     FROM employees
#     GROUP BY Job
#     ORDER BY avg_salary DESC
# """)
# result.show()

# ============================================================================
# SECTION 9: READING AND WRITING DATA
# ============================================================================


In [None]:
df.createOrReplaceTempView("employees")
#

In [69]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [70]:
ls /content/drive/MyDrive

 [0m[01;36mCNN[0m@                   [01;34m'Colab Notebooks'[0m/  [01;36m'data sets (1)'[0m@   Untitled0.ipynb
 CNN_ADS_MLClass.ipynb  [01;36m'data sets'[0m@         main_data.csv    [01;34m'Wednesday '[0m/


In [75]:
ls /content/drive/MyDrive/ main_data.csv /pyspark

ls: cannot access 'main_data.csv': No such file or directory
ls: cannot access '/pyspark': No such file or directory
/content/drive/MyDrive/:
 [0m[01;36mCNN[0m@                   [01;34m'Colab Notebooks'[0m/  [01;36m'data sets (1)'[0m@   Untitled0.ipynb
 CNN_ADS_MLClass.ipynb  [01;36m'data sets'[0m@         main_data.csv    [01;34m'Wednesday '[0m/


In [35]:
#/content/sample_data
print("\n=== Writing Data ===")
# Write to CSV (this will create a directory with part files)
df.write.mode("overwrite").option("header", "true").csv("/content/rani")



=== Writing Data ===


In [36]:
df.coalesce(1).write.mode("overwrite").option("header", "true").csv("/content/singleemployees_csv")
#

In [37]:
df.write.partitionBy("Age").mode("overwrite").option("header", "true").csv("/content/columnemployees_csv")

In [38]:

# Write to JSON
df.write.mode("overwrite").json("/content/employees_json")


In [39]:

# Write to Parquet
df.write.mode("overwrite").parquet("/content/employees_parquet")

print("Data written successfully!")


Data written successfully!


In [40]:

# ============================================================================
# SECTION 10: PERFORMANCE OPTIMIZATION
# ============================================================================

print("\n=== Performance Tips ===")

# Cache DataFrame for repeated operations
df_cached = df.cache()
print("DataFrame cached")

# Check partitions
print(f"Number of partitions: {df.rdd.getNumPartitions()}")



=== Performance Tips ===
DataFrame cached
Number of partitions: 2


In [41]:

# Repartition for better performance
df_repartitioned = df.repartition(3)
print(f"After repartitioning: {df_repartitioned.rdd.getNumPartitions()}")


After repartitioning: 3


In [42]:

# ============================================================================
# SECTION 11: WORKING WITH DIFFERENT DATA TYPES
# ============================================================================

print("\n=== Working with Arrays and Maps ===")
from pyspark.sql.functions import array, map_from_arrays, explode



=== Working with Arrays and Maps ===


In [43]:

# Create DataFrame with complex types
complex_data = [
    ("Alice", ["Python", "Spark", "SQL"], {"skill1": "Expert", "skill2": "Intermediate"}),
    ("Bob", ["Java", "Hadoop"], {"skill1": "Advanced", "skill2": "Expert"}),
    ("Charlie", ["R", "Statistics"], {"skill1": "Intermediate", "skill2": "Advanced"})
]

complex_df = spark.createDataFrame(complex_data, ["Name", "Skills", "SkillLevels"])
complex_df.show(truncate=False)


+-------+--------------------+--------------------------------------------+
|Name   |Skills              |SkillLevels                                 |
+-------+--------------------+--------------------------------------------+
|Alice  |[Python, Spark, SQL]|{skill1 -> Expert, skill2 -> Intermediate}  |
|Bob    |[Java, Hadoop]      |{skill1 -> Advanced, skill2 -> Expert}      |
|Charlie|[R, Statistics]     |{skill1 -> Intermediate, skill2 -> Advanced}|
+-------+--------------------+--------------------------------------------+



In [44]:

# Explode array
exploded_df = complex_df.select("Name", explode("Skills").alias("Skill"))
exploded_df.show()


+-------+----------+
|   Name|     Skill|
+-------+----------+
|  Alice|    Python|
|  Alice|     Spark|
|  Alice|       SQL|
|    Bob|      Java|
|    Bob|    Hadoop|
|Charlie|         R|
|Charlie|Statistics|
+-------+----------+



In [45]:

# ============================================================================
# SECTION 12: USER-DEFINED FUNCTIONS (UDF)
# ============================================================================

print("\n=== User Defined Functions ===")
from pyspark.sql.functions import udf

# Define a UDF
def salary_grade(salary):
    if salary > 80000:
        return "A"
    elif salary > 70000:
        return "B"
    else:
        return "C"

# Register UDF
salary_grade_udf = udf(salary_grade, StringType())



=== User Defined Functions ===


In [46]:
salary_grade_udf

In [47]:

# Apply UDF
df_with_grade = df.withColumn("Grade", salary_grade_udf(col("Salary")))
df_with_grade.show()



+-------+---+--------+------+-----+
|   Name|Age|     Job|Salary|Grade|
+-------+---+--------+------+-----+
|  Alice| 25|Engineer| 75000|    B|
|    Bob| 30| Manager| 85000|    A|
|Charlie| 35| Analyst| 65000|    C|
|  Diana| 28|Designer| 70000|    C|
|    Eve| 32|Engineer| 80000|    B|
+-------+---+--------+------+-----+

