In [1]:
## 1. Create and Inspect a DataFrame
## Objective: Practice creating a DataFrame and inspecting its contents.
## Description:
## Create a Pandas DataFrame using a dictionary with the following data:

## Names: Alice, Bob, Charlie
## Ages: 25, 30, 35
## Cities: New York, Paris, London
## Display the first few rows of the DataFrame and print its basic info (data types and shape).

In [2]:
import pandas as pd

data = {'Names': ['Alice', 'Bob', 'Charlie'],
        'Ages': [25, 30, 35],
        'Cities': ['New York', 'Paris', 'London']
        }
df = pd.DataFrame(data, columns =[ 'Names', 'Ages', 'Cities'])
df.info()
df.shape
df.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Names   3 non-null      object
 1   Ages    3 non-null      int64 
 2   Cities  3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes


Index(['Names', 'Ages', 'Cities'], dtype='object')

In [3]:
## 2. Select and Filter Data
## Objective: Learn to select columns and filter rows based on conditions.
## Description:
## Given a DataFrame containing employee data with columns name, department, and salary, write code to:

## Select only the name and salary columns.
## Filter and display the rows where salary is greater than 50000.

In [4]:
import pandas as pd

data = {'name': ['David', 'Serena', 'Francesco'],
        'department': ['HR', 'Finance', 'IT'],
        'salary': [4800, 5000, 7000]
        }
df = pd.DataFrame(data, columns = ['name', 'department', 'salary'])
df

df[['name', 'salary']]
# Create a boolean mask for salaries greater than 5000
mask = df['salary'] > 5000  # Inner: creates a Boolean Series (True/False for each row)
print(mask)
# Use the mask to filter the DataFrame
filtered_df = df[mask]      # Outer: selects only rows where mask is True
print(filtered_df)


## or all in on line
mask = df[df['salary'] > 5000]




0    False
1    False
2     True
Name: salary, dtype: bool
        name department  salary
2  Francesco         IT    7000


In [5]:
## 3. Add, Update, and Remove Columns
## Objective: Manipulate DataFrame columns.

## Description:
## Starting with a DataFrame containing columns product and price, do the following:

## Add a new column called discounted_price that is 90% of price.
## Update the product column to uppercase for all rows.
## Remove the original price column from the DataFrame.

In [6]:
import pandas as pd

dataframe = {'product': ['apple', 'banana', 'carrot'], 'price': [1.20, 0.80, 0.50]}
print(dataframe)

df = pd.DataFrame(dataframe)
print(df)

df['discounted_price'] = df['price'] * 0.90
print(df)
df['product'] = df['product'].str.upper()
print(df)
df = df.drop('price', axis=1)
print(df)


{'product': ['apple', 'banana', 'carrot'], 'price': [1.2, 0.8, 0.5]}
  product  price
0   apple    1.2
1  banana    0.8
2  carrot    0.5
  product  price  discounted_price
0   apple    1.2              1.08
1  banana    0.8              0.72
2  carrot    0.5              0.45
  product  price  discounted_price
0   APPLE    1.2              1.08
1  BANANA    0.8              0.72
2  CARROT    0.5              0.45
  product  discounted_price
0   APPLE              1.08
1  BANANA              0.72
2  CARROT              0.45


In [7]:
## 4. Group and Aggregate  ???

## Objective: Summarize data using groupby and aggregation.
## Description:
## Given a DataFrame with columns category and sales, group the data by category and calculate the total and average sales for each category.
## Display the resulting summary DataFrame.

In [None]:
import pandas as pd

df = {'category':['A', 'B', 'A', 'B', 'C'], 'sales':[100, 200, 150, 120, 300]}
df = pd.DataFrame(df, columns = ['category', 'sales'])
print(df)
#df.info()

summary = df.groupby('category')['sales'].agg(['sum', 'mean'])
print(summary)

  category  sales
0        A    100
1        B    200
2        A    150
3        B    120
4        C    300
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x12ac3c9d0>


In [9]:
## 5. Read and Write CSV Files
## Objective: Practice file I/O with Pandas.
## Description:
## Write code to:

## Read a CSV file named data.csv into a DataFrame.
## Display the first 2 rows of the data.
## Save only the columns name and score into a new CSV file called filtered_data.csv.

In [10]:
import pandas as pd
import os

## Crate a dataframe as dictionary
df = {'name':['Eve', 'Frank', 'Grace'], 'score':[88, 92, 95], 'extra':['foo', 'bar', 'baz']}

df = pd.DataFrame(df, columns=['name', 'score', 'extra'])

print(df)
print(os.getcwd())

## Saving df as my_dataframe.csv 
df.to_csv('my_dataframe.csv', index=False)

## Reading my_dataframe and save it as new_data
new_data = pd.read_csv('my_dataframe.csv', index_col=False)


## Display only the first 2 rows
new_data = (new_data[0:2])
print(new_data)
## Selecting only columns name and score
filtered_data = new_data[['name', 'score']]

## Saving the new csv filtered data 
print(filtered_data)
filtered_data.to_csv('filtered_data.csv')


    name  score extra
0    Eve     88   foo
1  Frank     92   bar
2  Grace     95   baz
/Users/giorgiocavallo/Desktop/Python_+_R_interview/Python_pandas/Pandas
    name  score extra
0    Eve     88   foo
1  Frank     92   bar
    name  score
0    Eve     88
1  Frank     92
