<a href="https://colab.research.google.com/github/Happday-bot/Happday-bot/blob/main/PandasCheatSheet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# prompt: display all functions of pandas with its meaning and exapmle

import pandas as pd

# This code demonstrates some common pandas functions, but it's impossible to display *all* of them
# with explanations and examples in a single response due to the sheer number of available functions.

# Creating a DataFrame
data = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
df = pd.DataFrame(data)

# 1. head() - Displays the first few rows of the DataFrame
print("head():")
print(df.head(2))  # Display the first 2 rows
print("-" * 20)


# 2. tail() - Displays the last few rows of the DataFrame
print("tail():")
print(df.tail(1))  # Display the last row
print("-" * 20)


# 3. info() - Displays information about the DataFrame, including data types and missing values.
print("info():")
df.info()
print("-" * 20)


# 4. describe() - Generates descriptive statistics for numerical columns.
print("describe():")
print(df.describe())
print("-" * 20)

# 5. shape - Returns the dimensions of the DataFrame (rows, columns)
print("shape:")
print(df.shape)
print("-" * 20)

# 6. columns -  Returns the column labels of the DataFrame.
print("columns:")
print(df.columns)
print("-" * 20)

# 7. dtypes - Returns the data types of each column.
print("dtypes:")
print(df.dtypes)
print("-" * 20)

# 8. loc[] - Access a group of rows and columns by label(s) or a boolean array.
print("loc[]:")
print(df.loc[0, 'col1']) # Access the value at row index 0 and column label 'col1'
print("-" * 20)

# 9. iloc[] - Access a group of rows and columns by integer position(s).
print("iloc[]:")
print(df.iloc[1, 0]) # Access the value at row position 1 and column position 0
print("-" * 20)

# 10. sort_values() - Sort the DataFrame by one or more columns.
print("sort_values():")
print(df.sort_values('col2', ascending=False))
print("-" * 20)

# 11. groupby() - Group data by one or more columns.
print("groupby():")
print(df.groupby('col1').sum()) # Group by 'col1' and sum other columns
print("-" * 20)

#To explore other functions, use pandas's documentation:
# https://pandas.pydata.org/pandas-docs/stable/reference/index.html

head():
   col1  col2
0     1     4
1     2     5
--------------------
tail():
   col1  col2
2     3     6
--------------------
info():
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   col1    3 non-null      int64
 1   col2    3 non-null      int64
dtypes: int64(2)
memory usage: 176.0 bytes
--------------------
describe():
       col1  col2
count   3.0   3.0
mean    2.0   5.0
std     1.0   1.0
min     1.0   4.0
25%     1.5   4.5
50%     2.0   5.0
75%     2.5   5.5
max     3.0   6.0
--------------------
shape:
(3, 2)
--------------------
columns:
Index(['col1', 'col2'], dtype='object')
--------------------
dtypes:
col1    int64
col2    int64
dtype: object
--------------------
loc[]:
1
--------------------
iloc[]:
2
--------------------
sort_values():
   col1  col2
2     3     6
1     2     5
0     1     4
--------------------
groupby():
      col2
col1      
1  

In [4]:
# prompt: instead of directing to a different link print all functiond here itself

import pandas as pd

# Creating a DataFrame
data = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
df = pd.DataFrame(data)

# Function Demonstrations

# 1. head()
print("head():")
print(df.head(2))
print("-" * 20)

# 2. tail()
print("tail():")
print(df.tail(1))
print("-" * 20)

# 3. info()
print("info():")
df.info()
print("-" * 20)

# 4. describe()
print("describe():")
print(df.describe())
print("-" * 20)

# 5. shape
print("shape:")
print(df.shape)
print("-" * 20)

# 6. columns
print("columns:")
print(df.columns)
print("-" * 20)

# 7. dtypes
print("dtypes:")
print(df.dtypes)
print("-" * 20)

# 8. loc[]
print("loc[]:")
print(df.loc[0, 'col1'])
print("-" * 20)

# 9. iloc[]
print("iloc[]:")
print(df.iloc[1, 0])
print("-" * 20)

# 10. sort_values()
print("sort_values():")
print(df.sort_values('col2', ascending=False))
print("-" * 20)

# 11. groupby()
print("groupby():")
print(df.groupby('col1').sum())
print("-" * 20)

# 12. to_csv()
print("to_csv():") # Demonstrates saving to a CSV (no output to console)
df.to_csv("example.csv", index=False)
print("DataFrame saved to 'example.csv'")
print("-" * 20)

# 13. apply()
print("apply():")
print(df['col1'].apply(lambda x: x * 2))
print("-" * 20)


# 14. mean()
print("mean():")
print(df.mean())
print("-" * 20)

# 15. max()
print("max():")
print(df.max())
print("-"*20)


# 16. min()
print("min():")
print(df.min())
print("-" * 20)

# 17. std()
print("std():")
print(df.std())
print("-" * 20)

# 18. count()
print("count():")
print(df.count())
print("-" * 20)

# ... (Many more functions could be included)


# Note: This is NOT an exhaustive list.
# Refer to the official Pandas documentation for the complete list and details:
# https://pandas.pydata.org/docs/reference/index.html

head():
   col1  col2
0     1     4
1     2     5
--------------------
tail():
   col1  col2
2     3     6
--------------------
info():
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   col1    3 non-null      int64
 1   col2    3 non-null      int64
dtypes: int64(2)
memory usage: 176.0 bytes
--------------------
describe():
       col1  col2
count   3.0   3.0
mean    2.0   5.0
std     1.0   1.0
min     1.0   4.0
25%     1.5   4.5
50%     2.0   5.0
75%     2.5   5.5
max     3.0   6.0
--------------------
shape:
(3, 2)
--------------------
columns:
Index(['col1', 'col2'], dtype='object')
--------------------
dtypes:
col1    int64
col2    int64
dtype: object
--------------------
loc[]:
1
--------------------
iloc[]:
2
--------------------
sort_values():
   col1  col2
2     3     6
1     2     5
0     1     4
--------------------
groupby():
      col2
col1      
1  

In [5]:
# prompt: add more

# 19. unique()
print("unique():")
print(df['col1'].unique())
print("-" * 20)

# 20. value_counts()
print("value_counts():")
print(df['col1'].value_counts())
print("-" * 20)

# 21. dropna()
print("dropna():")
# Create a DataFrame with NaN values
df_nan = pd.DataFrame({'col1': [1, 2, None], 'col2': [4, None, 6]})
print(df_nan.dropna())
print("-" * 20)

# 22. fillna()
print("fillna():")
print(df_nan.fillna(0))
print("-" * 20)

# 23. rename()
print("rename():")
print(df.rename(columns={'col1': 'new_col1'}))
print("-" * 20)

# 24. drop()
print("drop():")
print(df.drop(columns=['col2'])) # Drop column 'col2'
print(df.drop(index=[0])) # Drop row with index 0
print("-" * 20)


# 25. merge()
print("merge():")
df2 = pd.DataFrame({'col1': [1, 4, 5], 'col3': [7, 8, 9]})
print(pd.merge(df, df2, on='col1', how='inner'))
print("-"*20)

# 26. concat()
print("concat():")
df3 = pd.DataFrame({'col1': [7, 8, 9], 'col2': [10, 11, 12]})
print(pd.concat([df, df3], ignore_index=True))
print("-"*20)

# 27. isnull()
print("isnull():")
print(df.isnull())
print("-" * 20)

unique():
[1 2 3]
--------------------
value_counts():
col1
1    1
2    1
3    1
Name: count, dtype: int64
--------------------
dropna():
   col1  col2
0   1.0   4.0
--------------------
fillna():
   col1  col2
0   1.0   4.0
1   2.0   0.0
2   0.0   6.0
--------------------
rename():
   new_col1  col2
0         1     4
1         2     5
2         3     6
--------------------
drop():
   col1
0     1
1     2
2     3
   col1  col2
1     2     5
2     3     6
--------------------
merge():
   col1  col2  col3
0     1     4     7
--------------------
concat():
   col1  col2
0     1     4
1     2     5
2     3     6
3     7    10
4     8    11
5     9    12
--------------------
isnull():
    col1   col2
0  False  False
1  False  False
2  False  False
--------------------


In [6]:
# prompt: more

import pandas as pd

# ... (Your existing code)

# 28. notnull()
print("notnull():")
print(df.notnull())
print("-" * 20)

# 29. duplicated()
print("duplicated():")
df_dup = pd.DataFrame({'col1': [1, 2, 2, 3], 'col2': [4, 5, 5, 6]})
print(df_dup.duplicated())
print("-" * 20)

# 30. drop_duplicates()
print("drop_duplicates():")
print(df_dup.drop_duplicates())
print("-" * 20)

notnull():
   col1  col2
0  True  True
1  True  True
2  True  True
--------------------
duplicated():
0    False
1    False
2     True
3    False
dtype: bool
--------------------
drop_duplicates():
   col1  col2
0     1     4
1     2     5
3     3     6
--------------------


In [7]:
# prompt: more, atleast another 50

# ... (Your existing code)

# 31. corr()
print("corr():")
print(df.corr())
print("-" * 20)

# 32. cov()
print("cov():")
print(df.cov())
print("-" * 20)

# 33. nunique()
print("nunique():")
print(df['col1'].nunique()) # Number of unique values in 'col1'
print("-" * 20)

# 34. sample()
print("sample():")
print(df.sample(n=1))  # Random sample of 1 row
print("-" * 20)

# 35. replace()
print("replace():")
print(df.replace(1, 10)) # Replace all occurrences of 1 with 10
print("-" * 20)


# 36. astype()
print("astype():")
print(df['col1'].astype(float)) # change the datatype of column 'col1' to float
print("-"*20)


# 37. to_numpy()
print("to_numpy():")
print(df.to_numpy()) # Convert DataFrame to NumPy array
print("-"*20)

# 38. memory_usage()
print("memory_usage():")
print(df.memory_usage()) # Memory usage of each column
print("-" * 20)


# 39. set_index()
print("set_index():")
print(df.set_index('col1')) # Set 'col1' as the index
print("-" * 20)

# 40. reset_index()
print("reset_index():")
df_indexed = df.set_index('col1')
print(df_indexed.reset_index()) # Reset the index
print("-" * 20)


# 41. add_prefix()
print("add_prefix():")
print(df.add_prefix('prefix_')) # Add a prefix to column names
print("-" * 20)


# 42. add_suffix()
print("add_suffix():")
print(df.add_suffix('_suffix')) # Add a suffix to column names
print("-"*20)


# 43. select_dtypes()
print("select_dtypes():")
print(df.select_dtypes(include='number')) # Select columns with numeric data types
print("-"*20)

# 44. filter()
print("filter():")
print(df.filter(like='col')) # Filter columns containing "col"
print("-"*20)


# 45. insert()
print("insert():")
df.insert(loc=2, column="new_column", value=[7,8,9]) #insert a new column
print(df)
print("-"*20)



# 46. pop()
print("pop():")
new_column = df.pop("new_column") #remove the column
print(df)
print(new_column)
print("-"*20)



#47. insert
print("insert:")
df.insert(0, "first_column", [10,11,12]) #insert a column in the beginning
print(df)
print("-"*20)

# 48.  explode()
print("explode():")
df_exploded = pd.DataFrame({'col1':[1,2], 'col2': [[3,4],[5]]})
print(df_exploded.explode('col2'))
print("-"*20)

# 49. melt()
print("melt():")
print(pd.melt(df, id_vars=['col1'], value_vars=['col2']))
print("-"*20)


# 50. pivot()
print("pivot():")
df_pivot = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
   'two'],
   'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
   'baz': [1, 2, 3, 4, 5, 6]})
print(df_pivot.pivot(index='foo', columns='bar', values='baz'))
print("-"*20)

corr():
      col1  col2
col1   1.0   1.0
col2   1.0   1.0
--------------------
cov():
      col1  col2
col1   1.0   1.0
col2   1.0   1.0
--------------------
nunique():
3
--------------------
sample():
   col1  col2
0     1     4
--------------------
replace():
   col1  col2
0    10     4
1     2     5
2     3     6
--------------------
astype():
0    1.0
1    2.0
2    3.0
Name: col1, dtype: float64
--------------------
to_numpy():
[[1 4]
 [2 5]
 [3 6]]
--------------------
memory_usage():
Index    128
col1      24
col2      24
dtype: int64
--------------------
set_index():
      col2
col1      
1        4
2        5
3        6
--------------------
reset_index():
   col1  col2
0     1     4
1     2     5
2     3     6
--------------------
add_prefix():
   prefix_col1  prefix_col2
0            1            4
1            2            5
2            3            6
--------------------
add_suffix():
   col1_suffix  col2_suffix
0            1            4
1            2            5
2    