<a href="https://colab.research.google.com/github/Junseokee/Study-Python/blob/main/100_pandas_puzzle_1_ipynb%EC%9D%98_%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#10 minutes to pandas
####This is a short introduction to pandas, geared mainly for new users. You can see more complex recipes in the Cookbook.

####Customarily, we import as follows:

In [None]:
import numpy as np
import pandas as pd

#Object creation
####See the Intro to data structures section.

####Creating a Series by passing a list of values, letting pandas create a default integer index:

In [None]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [None]:
#Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns:
dates = pd.date_range("20130101", periods = 6) #2013.01.01부터 날짜 6개 생성
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [None]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD")) #6행4열 데이터프레임을 날짜데이터로 랜덤으로 생성 칼럼명은 ABCD
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.249748,0.697185,1.5536,0.859551
2013-01-02,1.756773,0.74499,0.917294,-0.804845
2013-01-03,0.306445,0.319013,0.762304,0.003968
2013-01-04,-0.750983,-2.596451,1.257411,-0.420489
2013-01-05,1.083972,-0.809843,1.861297,-0.96316
2013-01-06,-0.989817,-2.481632,-1.158652,-1.494048


In [None]:
#Creating a DataFrame by passing a dictionary of objects that can be converted into a series-like structure:
df2 = pd.DataFrame(
    {
        "A" : 1.0,
        "B" : pd.Timestamp("20130102"),
        "C" : pd.Series(1, index=list(range(4)), dtype="float32"),
        "D" : np.array([3] * 4, dtype = "int32"),
        "E" : pd.Categorical(["test", "train", "test", "train"]),
        "F" : "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [None]:
#The columns of the resulting DataFrame have different dtypes:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

#Viewing data
See the Basics section.

Here is how to view the top and bottom rows of the frame:

In [None]:
df.head() #앞 5개

Unnamed: 0,A,B,C,D
2013-01-01,-1.249748,0.697185,1.5536,0.859551
2013-01-02,1.756773,0.74499,0.917294,-0.804845
2013-01-03,0.306445,0.319013,0.762304,0.003968
2013-01-04,-0.750983,-2.596451,1.257411,-0.420489
2013-01-05,1.083972,-0.809843,1.861297,-0.96316


In [None]:
df.tail(3) #뒤 3개

Unnamed: 0,A,B,C,D
2013-01-04,-0.750983,-2.596451,1.257411,-0.420489
2013-01-05,1.083972,-0.809843,1.861297,-0.96316
2013-01-06,-0.989817,-2.481632,-1.158652,-1.494048


In [None]:
df.index #인덱스 값

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [None]:
df.columns #컬럼의 값

Index(['A', 'B', 'C', 'D'], dtype='object')

DataFrame.to_numpy() gives a NumPy representation of the underlying data. Note that this can be an expensive operation when your DataFrame has columns with different data types, which comes down to a fundamental difference between pandas and NumPy: NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column. When you call DataFrame.to_numpy(), pandas will find the NumPy dtype that can hold all of the dtypes in the DataFrame. This may end up being object, which requires casting every value to a Python object.

Note.

DataFrame.to_numpy() does not include the index or column labels in the output.

In [None]:
#For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data:
df.to_numpy()

array([[-1.24974818,  0.69718468,  1.5535997 ,  0.85955125],
       [ 1.75677265,  0.74499049,  0.91729363, -0.80484472],
       [ 0.30644534,  0.31901337,  0.76230352,  0.00396843],
       [-0.75098328, -2.59645087,  1.25741068, -0.42048934],
       [ 1.08397196, -0.8098426 ,  1.86129722, -0.96315993],
       [-0.98981734, -2.48163248, -1.15865206, -1.49404761]])

In [None]:
#For df2, the DataFrame with multiple dtypes, DataFrame.to_numpy() is relatively expensive:
df2.to_numpy() 

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [None]:
#describe() shows a quick statistic summary of your data:
df.describe() #요약 R의 summary랑 비슷

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.026107,-0.68779,0.865542,-0.469837
std,1.2212,1.540157,1.07044,0.824476
min,-1.249748,-2.596451,-1.158652,-1.494048
25%,-0.930109,-2.063685,0.801051,-0.923581
50%,-0.222269,-0.245415,1.087352,-0.612667
75%,0.88959,0.602642,1.479552,-0.102146
max,1.756773,0.74499,1.861297,0.859551


In [None]:
#Transposing your data:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-1.249748,1.756773,0.306445,-0.750983,1.083972,-0.989817
B,0.697185,0.74499,0.319013,-2.596451,-0.809843,-2.481632
C,1.5536,0.917294,0.762304,1.257411,1.861297,-1.158652
D,0.859551,-0.804845,0.003968,-0.420489,-0.96316,-1.494048


In [None]:
#Sorting by an axis: 축기준
df.sort_index(axis=1, ascending=False) #True 할 경우 ABCD

Unnamed: 0,D,C,B,A
2013-01-01,0.859551,1.5536,0.697185,-1.249748
2013-01-02,-0.804845,0.917294,0.74499,1.756773
2013-01-03,0.003968,0.762304,0.319013,0.306445
2013-01-04,-0.420489,1.257411,-2.596451,-0.750983
2013-01-05,-0.96316,1.861297,-0.809843,1.083972
2013-01-06,-1.494048,-1.158652,-2.481632,-0.989817


In [None]:
#Sorting by values: 값기준
df.sort_values(by="B")  #B의 값이 큰순서로 정렬

Unnamed: 0,A,B,C,D
2013-01-04,-0.750983,-2.596451,1.257411,-0.420489
2013-01-06,-0.989817,-2.481632,-1.158652,-1.494048
2013-01-05,1.083972,-0.809843,1.861297,-0.96316
2013-01-03,0.306445,0.319013,0.762304,0.003968
2013-01-01,-1.249748,0.697185,1.5536,0.859551
2013-01-02,1.756773,0.74499,0.917294,-0.804845


In [None]:
333

#Importing pandas
#Getting started and checking your pandas setup
#Difficulty: easy


In [None]:
#1. Import pandas under the alias pd.
import pandas in pd

#2. Print the version of pandas that has been imported.

#3. Print out all the version information of the libraries that are required by the pandas library.