### python里面，list/dict/dataframe/array都是可变对象，这些数据在函数调用时要小心，函数调用完后原来的值会改变

In [1]:
import numpy as np
import pandas as pd

In [6]:
# 函数调用后，初始的df也被改变了
a = np.array( np.mat('1 2 3 ; 4 5 6') )
df = pd.DataFrame(a, columns=['A', 'B', 'C'])

# dataframe

def deal_with_df_data(df_input):
    df_output = df_input
    df_output['D'] = df_output['B'] + 2
    return df_output

print("df's raw data:")
print(df)

df1 = deal_with_df_data(df)

print("df1:")
print(df1)
print("df(after deal_with_df_data):")
print(df)

# 函数调用后，初始的a也被改变了

def deal_with_array_data(array_input):
    array_output = array_input
    array_output[0, 1] = 100
    return array_output

print("a's raw data:")
print(a)

a1 = deal_with_array_data(a)

print("a1:")
print(a1)
print("a(after deal_with_array_data):")
print(a)

df's raw data:
   A  B  C
0  1  2  3
1  4  5  6
df1:
   A  B  C  D
0  1  2  3  4
1  4  5  6  7
df(after deal_with_df_data):
   A  B  C  D
0  1  2  3  4
1  4  5  6  7
a's raw data:
[[1 2 3]
 [4 5 6]]
a1:
[[  1 100   3]
 [  4   5   6]]
a(after deal_with_array_data):
[[  1 100   3]
 [  4   5   6]]


In [3]:
# 修改一下函数，让df不再被改变
a = np.array( np.mat('1 2 3 ; 4 5 6') )
df = pd.DataFrame(a, columns=['A', 'B', 'C'])

def deal_with_df_data(df_input):
    df_output = df_input.copy()  # 需要拷贝一份
    df_output['D'] = df_output['B'] + 2
    return df_output

print("df's raw data:")
print(df)

df1 = deal_with_df_data(df)

print("df1:")
print(df1)
print("df(after deal_with_df_data):")
print(df)

# 修改一下函数，让a不再被改变

def deal_with_array_data(array_input):
    array_output = np.copy(array_input)
    array_output[0, 1] = 100
    return array_output

print("a's raw data:")
print(a)

a1 = deal_with_array_data(a)

print("a1:")
print(a1)
print("a(after deal_with_array_data):")
print(a)

df's raw data:
   A  B  C
0  1  2  3
1  4  5  6
df1:
   A  B  C  D
0  1  2  3  4
1  4  5  6  7
df(after deal_with_df_data):
   A  B  C
0  1  2  3
1  4  5  6
a's raw data:
[[1 2 3]
 [4 5 6]]
a1:
[[  1 100   3]
 [  4   5   6]]
a(after deal_with_array_data):
[[1 2 3]
 [4 5 6]]


In [4]:
# (疑问？)整体地操作时，似乎没有这个问题（当然，为了避免这个问题，最好都先copy）
df = pd.DataFrame(a, columns=['A', 'B', 'C'])
print("df's raw data:")
print(df)

def deal_with_data(df_input):
    df_output = df_input
    df_output = df_output + 2  # 每个值都加2
    return df_output

df1 = deal_with_data(df)

print("df1:")
print(df1)
print("df(after deal_with data):")
print(df)

df's raw data:
   A  B  C
0  1  2  3
1  4  5  6
df1:
   A  B  C
0  3  4  5
1  6  7  8
df(after deal_with data):
   A  B  C
0  1  2  3
1  4  5  6


In [7]:
df1=df

Unnamed: 0,A,B,C,D
0,1,100,3,4
1,4,5,6,7
