### Các xử lý cơ bản hữu dụng
#### 1. Bảng

In [1]:
import pandas as pd


# Product and credit card information
df_product_data = pd.DataFrame({
    'Customer_ID': [11, 12, 12, 13, 14, 15],
    'Product_Name': ['CC', 'CC', 'TD', 'CC', 'CC','TD'],
    'Card_Number': ['1234', '3457', '3458', '4589', '7890', '8901']
})

df_cc_info = pd.DataFrame({
    'Customer_ID_2': [11, 12, 13, 14],
    'Limit': ['20000', '25000', '50000', '25000'],
    'SaleAgent': ['34', '57', '00', '00'],
    'SaleChannel': ['Offline', 'Offline', 'Onl', 'Onl']
})

df_cc_payment = pd.DataFrame({
    'Customer_ID': [11, 12, 13, 14],
    'Payment_volume': ['10000', '15000', '5000', '2000'],
    'Payment_day': ['2024-01-05', '2024-01-10', '2024-01-15', '2024-01-20']
})
df_cc_payment

Unnamed: 0,Customer_ID,Payment_volume,Payment_day
0,11,10000,2024-01-05
1,12,15000,2024-01-10
2,13,5000,2024-01-15
3,14,2000,2024-01-20


##### 1. Lấy một số dòng theo điều kiện trước

In [3]:
import pandas as pd

# Customer demographics
df_customerdemogr = pd.DataFrame({
    'Customer_ID': [11, 12, 13, 14, 15],
    'Name': ['John', 'Emma', 'Michael', 'Sophia', 'William'],
    'Age': [25, 32, 45, 28, 50],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Male'],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Chicago', 'Houston'],
    'Education': ['High School', 'Bachelor', 'Master', 'Bachelor', 'PhD'],
    'Income': [50000, 60000, 90000, 60000, 80000]
})
df_customerdemogr

Unnamed: 0,Customer_ID,Name,Age,Gender,City,Education,Income
0,11,John,25,Male,New York,High School,50000
1,12,Emma,32,Female,Los Angeles,Bachelor,60000
2,13,Michael,45,Female,Chicago,Master,90000
3,14,Sophia,28,Male,Chicago,Bachelor,60000
4,15,William,50,Male,Houston,PhD,80000


In [4]:
Name_list = ['Emma', 'Michael']
df_customerdemogr_11 = df_customerdemogr[df_customerdemogr['Name'] == 'Emma']
df_customerdemogr_12 = df_customerdemogr[df_customerdemogr['Name'].isin(Name_list)]
df_customerdemogr_12

Unnamed: 0,Customer_ID,Name,Age,Gender,City,Education,Income
1,12,Emma,32,Female,Los Angeles,Bachelor,60000
2,13,Michael,45,Female,Chicago,Master,90000


##### 2. Lấy một số trường trong bảng

In [3]:
df_customerdemogr_2 = df_customerdemogr[['Customer_ID','City']]
df_customerdemogr_2

Unnamed: 0,Customer_ID,City
0,11,New York
1,12,Los Angeles
2,13,Chicago
3,14,Chicago
4,15,Houston


##### 3. Nối hai bảng trái phải theo Customer_ID

In [4]:
df_demo_product = pd.merge(df_customerdemogr, df_product_data, how='right', on='Customer_ID')
df_demo_product

Unnamed: 0,Customer_ID,Name,Age,Gender,City,Education,Income,Product_Name,Card_Number
0,11,John,25,Male,New York,High School,50000,CC,1234
1,12,Emma,32,Female,Los Angeles,Bachelor,60000,CC,3457
2,12,Emma,32,Female,Los Angeles,Bachelor,60000,TD,3458
3,13,Michael,45,Female,Chicago,Master,90000,CC,4589
4,14,Sophia,28,Male,Chicago,Bachelor,60000,CC,7890
5,15,William,50,Male,Houston,PhD,80000,TD,8901


Nối hai bảng với tên trường khác nhau

In [5]:
df_demo_product = pd.merge(df_customerdemogr, df_cc_info, on=['Customer_ID', 'Customer_ID_2'], how='left')
df_demo_product

KeyError: 'Customer_ID'

##### 4. Nối hai bảng trên dưới - Chèn thêm dòng cho bảng

In [6]:
row_2 = {'Customer_ID': '20','City': 'HCMC'}
# pd.DataFrame(row,index=[0])

df_customerdemogr_3 = pd.concat([df_customerdemogr_2,pd.DataFrame(row_2,index=[0])], ignore_index=True) 
df_customerdemogr_3 

Unnamed: 0,Customer_ID,City
0,11,New York
1,12,Los Angeles
2,13,Chicago
3,14,Chicago
4,15,Houston
5,20,HCMC


##### 5. Pivot - đổi chiều colume và dòng
Hãy pivot chiều dữ liệu với row là các City và Colume_1 là số lượng khách hàng của City đó, Colume_2 là Average thu nhập từ các khách hàng đến từ City đó

In [7]:
pivot_table = df_customerdemogr.pivot_table(index='City', values=['Customer_ID', 'Income']
                                            , aggfunc={'Customer_ID': 'count', 'Income': 'mean'})
pivot_table.columns = ['Number of Customers', 'Average Income']
pivot_table.reset_index(inplace=True)

pivot_table

Unnamed: 0,City,Number of Customers,Average Income
0,Chicago,2,75000.0
1,Houston,1,80000.0
2,Los Angeles,1,60000.0
3,New York,1,50000.0


##### 6. Advanced - Tạo mảng trắng

In [8]:
# Merge DataFrames
df_merge = pd.merge(df_customerdemogr, df_product_data, on='Customer_ID', how='outer')
df_merge = pd.merge(df_merge, df_cc_payment, on='Customer_ID', how='outer')

df_merge

Unnamed: 0,Customer_ID,Name,Age,Gender,City,Education,Income,Product_Name,Card_Number,Payment_volume,Payment_day
0,11,John,25,Male,New York,High School,50000,CC,1234,10000.0,2024-01-05
1,12,Emma,32,Female,Los Angeles,Bachelor,60000,CC,3457,15000.0,2024-01-10
2,12,Emma,32,Female,Los Angeles,Bachelor,60000,TD,3458,15000.0,2024-01-10
3,13,Michael,45,Female,Chicago,Master,90000,CC,4589,5000.0,2024-01-15
4,14,Sophia,28,Male,Chicago,Bachelor,60000,CC,7890,2000.0,2024-01-20
5,15,William,50,Male,Houston,PhD,80000,TD,8901,,


In [9]:
# Generate Payment_day values from current month to December 2024
current_month = pd.to_datetime('today').month
current_year = pd.to_datetime('today').year

payment_days = pd.date_range(start=pd.Timestamp(year=current_year, month=current_month, day=1), end=pd.Timestamp(year=2024, month=12, day=31), freq='M')
payment_days = payment_days.strftime('%Y-%m-%d')

# Create mapping table
df_mapping = df_merge[['City', 'Product_Name', 'Payment_day']].drop_duplicates().reset_index(drop=True)

# Expand mapping table with Payment_day values
df_mapping_expanded = pd.DataFrame(columns=['City', 'Product_Name', 'Payment_day'])
for _, row in df_mapping.iterrows():
    city = row['City']
    product = row['Product_Name']
    for payment_day in payment_days:
        df_mapping_expanded = pd.concat([df_mapping_expanded,
                                         pd.DataFrame({'City': [city], 'Product_Name': [product], 'Payment_day': [payment_day]})],
                                         ignore_index=True)

df_mapping_expanded

  payment_days = pd.date_range(start=pd.Timestamp(year=current_year, month=current_month, day=1), end=pd.Timestamp(year=2024, month=12, day=31), freq='M')


Unnamed: 0,City,Product_Name,Payment_day
0,New York,CC,2024-07-31
1,New York,CC,2024-08-31
2,New York,CC,2024-09-30
3,New York,CC,2024-10-31
4,New York,CC,2024-11-30
5,New York,CC,2024-12-31
6,Los Angeles,CC,2024-07-31
7,Los Angeles,CC,2024-08-31
8,Los Angeles,CC,2024-09-30
9,Los Angeles,CC,2024-10-31


#### 7. List comprehension

For a given list print all elements with even indices (L[0], L[2], ...)

Input format: 
- A list of elements, divided with space

Output format:
- Elements of given list with even indices


Input	
20 9 12 11 18 18 9 7 blowze
Result
['20', '12', '18', '9', 'blowze']


In [9]:
def slide(string_1):
    input_list = string_1.split()
    result = [item for i, item in enumerate(input_list) if i % 2 == 0]
    return result

string_1 = "20 9 12 11 18 18 9 7 blowze"
print(string_1.split())
print(slide(string_1))

['20', '9', '12', '11', '18', '18', '9', '7', 'blowze']
['20', '12', '18', '9', 'blowze']


Text

Given text, find the longest word in it. If there are multiple longest words, output the first one.

Input format
- Enter text of any length, words are separated by spaces.

Output format
- Find and output the longest word in the text.

Input_1 = "What is your name"
Input_2 = 'Big big Ban'
Input_3 = 'Need more brains'

Result_1 = 'What'
Result_2 = 'Big'
Result_3 = 'brains'


Task
Find the number of positive elements in the given list.

Input format
A list of integers is entered. The number of integers does not exceed 100. All numbers in the list are on the same line. The absolute value of the numbers does not exceed 100000.

Output format
Output a single number — the number of positive elements.

Input	Result
1 1 -2 3 -4 5
4
-1 -2 -3 -4 -5
0


Task
You are given a list a containing integers. Your task is to rewrite its elements into another list of the same size as follows: all negative elements should come first, followed by all the rest.

Input format
List of integers separated by spaces.

Output format
New list created according to the task condition.

Input	Result
1 -3 3 7
[-3, 1, 3, 7]
-4 -20 -412 -3 -2
[-4, -20, -412, -3, -2]


Task
Given a list a, find the minimum element and insert it at the beginning of the list, keeping the order of other elements. If there are multiple minimum elements, move the first found minimum element of the array to the beginning.

Input
Elements of the list separated by spaces in a single line.

Output
The modified list.

Input	Result
4 5 3 1 2
[1, 4, 5, 3, 2]
5 4 3 2 1 1
[1, 5, 4, 3, 2, 1]


Task
Given three lists of integers, print all the common elements in these lists in ascending order. Each element should be printed on a new line.

Input Format
Three lines containing lists of integers.

Output Format
Print the common elements of these three arrays in ascending order on separate lines.

Input	Result
1 2 3 7 8
9 2 5 3
7 3 2
2
3


Task
Neil Armstrong became the first person to land on the moon. One of Armstrong's tasks was to collect lunar soil (a year later, one of the samples of this soil would be given to the USSR Academy of Sciences).

It is known that Armstrong needs to find a piece of lunar rock weighing X. Given a list of real numbers containing the masses of all samples of lunar soil, determine the index of the array element that is closest in value to X. If there are multiple elements in the array that are equally close, output the index of the first one.

Input format
A list of real numbers - the mass of each sample A real number X - the mass of the desired piece of lunar soil (0 < X ≤ 1000)

Output format
A positive integer - the index of the sample of lunar soil that is closest in mass to X

Input	Result
808.31 70.5 59.58 22.2 2.8 269.1 0.4 5.9 0.4 7.9 
3.71

Task
Given a list of whole numbers, determine how many elements have their value equal to their position in the list (in case element numbering starts from one).

Input Format
A list of integers

Output Format
An integer representing the number of elements whose position in the list matches their value.

Note
Position = Index + 1

Input	Result
1 2 1 
2

Task
🤯 This is the hard task. You haven't learned the necessary knowledge to solve it, but you can research! 

We've always entered a list into a string using map().

But it could have been done without using map() ...

Given a list of integers, they are entered into a string separated by spaces. Come up with a way to read such a list without using map() and list().

Print the obtained list using print(list).

Input format
A list of integers entered into a string separated by spaces.

Output format
Print the obtained list using print(list).

Input	Result
8 33 19 21 1
[8, 33, 19, 21, 1]
