In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine 
from sqlalchemy import create_engine, text

PostgreSQL_HOST = '127.0.0.1'
PostgreSQL_PORT = '5432'
PostgreSQL_USER = 'postgres'
PostgreSQL_PASSWORD = '123456'
PostgreSQL_DB = 'sql_advanced'

engine = create_engine('postgresql+psycopg2://%s:%s@%s:%s/%s'
                           % (PostgreSQL_USER, PostgreSQL_PASSWORD, PostgreSQL_HOST, PostgreSQL_PORT, PostgreSQL_DB))

In [2]:
# 用于获取可重排列的 SQL 语句
query_1 = """
SELECT P1.name AS name_1, P2.name AS name_2
 FROM Products P1, Products P2;
"""

cross_df = pd.read_sql_query(query_1, engine) 
cross_df

Unnamed: 0,name_1,name_2
0,苹果,苹果
1,苹果,橘子
2,苹果,香蕉
3,橘子,苹果
4,橘子,橘子
5,橘子,香蕉
6,香蕉,苹果
7,香蕉,橘子
8,香蕉,香蕉


In [3]:
query  = """select * from Products"""
data = pd.read_sql_query(query, engine) 

data

df1 = data.copy()  
df2 = data.copy() 
result = pd.merge(df1, df2, on='name', suffixes=('_1', '_2'))  
result

Unnamed: 0,name,price_1,price_2
0,苹果,50,50
1,橘子,100,100
2,香蕉,80,80


In [4]:
query  = """select * from Products"""
Products = pd.read_sql_query(query, engine) 

# 将name列转换为一个DataFrame
name_df = Products["name"].to_frame()

# 创建一个公共的键列，值都为0
name_df["key"] = 0

# 用merge函数进行交叉连接，on参数指定公共的键列，how参数指定连接方式为outer
cross_join = name_df.merge(name_df, on="key", how="outer").rename(columns={"name_x": "name_1", "name_y": "name_2"})

# 删除结果表中的键列
cross_join = cross_join[['name_1','name_2']]

# 查看结果表
cross_join

Unnamed: 0,name_1,name_2
0,苹果,苹果
1,苹果,橘子
2,苹果,香蕉
3,橘子,苹果
4,橘子,橘子
5,橘子,香蕉
6,香蕉,苹果
7,香蕉,橘子
8,香蕉,香蕉


In [5]:
query  = """select * from Products"""
Products = pd.read_sql_query(query, engine) 

# 将name列转换为一个DataFrame
name_df = Products["name"].to_frame()

# 创建一个公共的键列，值都为0
name_df["key"] = 0

# 用merge函数进行交叉连接，on参数指定公共的键列，how参数指定连接方式为outer
cross_join = name_df.merge(name_df, on="key", how="outer").rename(columns={"name_x": "name_1", "name_y": "name_2"})

# 用query函数筛选出结果表中name_1和name_2不相等的行
# inner_join = cross_join.query("name_1 != name_2")

cross_join.drop_duplicates(inplace=True)

# 查看结果表
cross_join


Unnamed: 0,name_1,key,name_2
0,苹果,0,苹果
1,苹果,0,橘子
2,苹果,0,香蕉
3,橘子,0,苹果
4,橘子,0,橘子
5,橘子,0,香蕉
6,香蕉,0,苹果
7,香蕉,0,橘子
8,香蕉,0,香蕉


In [6]:
import pandas as pd
df = pd.DataFrame({
    'brand': ['apple', 'apple', 'strawberry', 'strawberry', 'strawberry'],
    'style': ['small', 'small', 'small', 'large', 'middle'],
    'price': [4, 4, 5, 10, 6]
})
df


Unnamed: 0,brand,style,price
0,apple,small,4
1,apple,small,4
2,strawberry,small,5
3,strawberry,large,10
4,strawberry,middle,6


In [7]:
df_1 = df.drop_duplicates()
df_1

Unnamed: 0,brand,style,price
0,apple,small,4
2,strawberry,small,5
3,strawberry,large,10
4,strawberry,middle,6


In [8]:
df

Unnamed: 0,brand,style,price
0,apple,small,4
1,apple,small,4
2,strawberry,small,5
3,strawberry,large,10
4,strawberry,middle,6


In [9]:
df_2 = df.drop_duplicates(keep=False)
df_2

Unnamed: 0,brand,style,price
2,strawberry,small,5
3,strawberry,large,10
4,strawberry,middle,6


In [10]:
df_3 = df.drop_duplicates(keep='first')
df_3

Unnamed: 0,brand,style,price
0,apple,small,4
2,strawberry,small,5
3,strawberry,large,10
4,strawberry,middle,6


In [11]:
query  = """
SELECT name, price,
     RANK() OVER (ORDER BY price DESC) AS rank_1,
     DENSE_RANK() OVER (ORDER BY price DESC) AS rank_2
FROM Products;"""

order_res = pd.read_sql_query(query, engine) 
order_res

Unnamed: 0,name,price,rank_1,rank_2
0,橘子,100,1,1
1,西瓜,80,2,2
2,苹果,50,3,3
3,葡萄,50,3,3
4,香蕉,50,3,3
5,柠檬,30,6,4


In [12]:
query = """
SELECT P1.name,
P1.price,
(SELECT COUNT(P2.price)
FROM Products P2
WHERE P2.price > P1.price) + 1 AS rank_1
FROM Products P1
ORDER BY rank_1;
"""
order_1 = pd.read_sql_query(query, engine) 
order_1

Unnamed: 0,name,price,rank_1
0,橘子,100,1
1,西瓜,80,2
2,苹果,50,3
3,葡萄,50,3
4,香蕉,50,3
5,柠檬,30,6


In [13]:
query = """
SELECT P1.name,
P1.price,
(SELECT COUNT(distinct P2.price)
FROM Products P2
WHERE P2.price > P1.price) + 1 AS rank_1
FROM Products P1
ORDER BY rank_1;
"""
order_2 = pd.read_sql_query(query, engine) 
order_2

Unnamed: 0,name,price,rank_1
0,橘子,100,1
1,西瓜,80,2
2,苹果,50,3
3,葡萄,50,3
4,香蕉,50,3
5,柠檬,30,4


In [14]:
query = """

SELECT P1.name,
 MAX(P1.price) AS price, 
 COUNT(P2.name) +1 AS rank_1
 FROM Products P1 INNER JOIN Products P2
 ON P1.price < P2.price
 GROUP BY P1.name
 ORDER BY rank_1;

"""
order_3 = pd.read_sql_query(query, engine) 
order_3

Unnamed: 0,name,price,rank_1
0,西瓜,80,2
1,苹果,50,3
2,葡萄,50,3
3,香蕉,50,3
4,柠檬,30,6


In [23]:
query = """

SELECT P1.name,
 MAX(P1.price) AS price, 
 COUNT(P2.name) +1 AS rank_1
 FROM Products P1 left join Products P2
 ON P1.price < P2.price
 GROUP BY P1.name
 ORDER BY rank_1;

"""
order_4 = pd.read_sql_query(query, engine) 
order_4

Unnamed: 0,name,price,rank_1
0,橘子,100,1
1,西瓜,80,2
2,苹果,50,3
3,葡萄,50,3
4,香蕉,50,3
5,柠檬,30,6


In [27]:
import pandas as pd  
  
# 读取数据  
data = {  
    'name': ['苹果', '橘子', '葡萄', '西瓜', '柠檬', '香蕉'],  
    'price': [50, 100, 50, 80, 30, 50]  
}  
df = pd.DataFrame(data)  
  
# 使用 rank() 函数进行排序，参数默认min,也可以设置为 first或 dense 

df['rk'] = df['price'].rank(method='min')  
# df['rk'] = df['price'].rank(method='dense')   

df_sorted = df.sort_values('rk')  

df_sorted

Unnamed: 0,name,price,rank
4,柠檬,30,1.0
0,苹果,50,2.0
2,葡萄,50,2.0
5,香蕉,50,2.0
3,西瓜,80,5.0
1,橘子,100,6.0


In [36]:
import pandas as pd  
  
# 读取数据  
data = {  
    'name': ['苹果', '橘子', '葡萄', '西瓜', '柠檬', '香蕉'],  
    'price': [50, 100, 50, 80, 30, 50]  
}  
df = pd.DataFrame(data)  
  
# 对价格列进行排序  
df_sorted = df.sort_values('price',ascending=False)  
  
# 重置索引，以便与原始数据框的索引对应  
df_sorted = df_sorted.reset_index(drop=True)  


# 添加排名列，使用 rank() 函数的替代方法  
# df_sorted['rk'] = df_sorted.groupby('price').cumcount() + 1  
  
df_sorted

Unnamed: 0,name,price
0,橘子,100
1,西瓜,80
2,苹果,50
3,葡萄,50
4,香蕉,50
5,柠檬,30


In [58]:
import pandas as pd

# 创建一个数据框
data = {'产品': ['苹果', '橘子', '葡萄', '西瓜', '柠檬', '香蕉'],
        '数值': [50, 100, 50, 80, 30, 50]}
df = pd.DataFrame(data)

# 对数值进行排序
df = df.sort_values('数值')

# 创建一个新的数据框，然后使用reset_index函数实现RANK()函数的功能
df_rank = df['数值'].drop_duplicates().reset_index(drop=True).reset_index()
df_rank.columns = ['排名', '数值']

# 将原始数据框和新的数据框进行合并
df = pd.merge(df, df_rank, on='数值', how='left')

print(df)


   产品   数值  排名
0  柠檬   30   0
1  苹果   50   1
2  葡萄   50   1
3  香蕉   50   1
4  西瓜   80   2
5  橘子  100   3


In [64]:
import pandas as pd

# 假设你的数据框是df，包含'产品'和'数值'两列
df = pd.DataFrame({
    'name': ['苹果', '橘子', '葡萄', '西瓜', '柠檬', '香蕉'],
    'price': [50, 100, 50, 80, 30, 50]
})

# 使用apply函数计算每个水果的价格在所有产品价格中的排名
df['rank'] = df['price'].apply(lambda x: (df['price'] > x).sum() + 1)
# df['dense_rank'] = df['price'].apply(lambda x: (df['price'].unique() > x).sum() + 1)

# 按照排名排序
df = df.sort_values('rank')
# df = df.sort_values('dense_rank')

df


Unnamed: 0,name,price,rank
1,橘子,100,1
3,西瓜,80,2
0,苹果,50,3
2,葡萄,50,3
5,香蕉,50,3
4,柠檬,30,6
